mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 03:44:58 +08:00
[Misc] Turn off encoder torch compile by default (#28634)
Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
b230286fbc
commit
d3387750f1
@ -10,8 +10,8 @@ from vllm.platforms import current_platform
|
||||
|
||||
def test_compile():
|
||||
vllm_config = VllmConfig()
|
||||
# Default configuration compiles mm encoder
|
||||
assert vllm_config.compilation_config.compile_mm_encoder
|
||||
# Default configuration does not compile mm encoder
|
||||
assert not vllm_config.compilation_config.compile_mm_encoder
|
||||
|
||||
|
||||
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
|
||||
@ -39,7 +39,10 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
|
||||
"Qwen/Qwen2.5-VL-3B-Instruct",
|
||||
max_model_len=2048,
|
||||
gpu_memory_utilization=0.8,
|
||||
compilation_config={"mode": CompilationMode.VLLM_COMPILE},
|
||||
compilation_config={
|
||||
"mode": CompilationMode.VLLM_COMPILE,
|
||||
"compile_mm_encoder": True,
|
||||
},
|
||||
) as _,
|
||||
):
|
||||
pass
|
||||
|
||||
@ -131,6 +131,7 @@ VLM_TEST_SETTINGS = {
|
||||
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
|
||||
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
|
||||
enforce_eager=False,
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
@ -160,6 +161,7 @@ VLM_TEST_SETTINGS = {
|
||||
VLMTestType.MULTI_IMAGE,
|
||||
VLMTestType.VIDEO,
|
||||
),
|
||||
enforce_eager=False,
|
||||
needs_video_metadata=True,
|
||||
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
|
||||
|
||||
@ -320,9 +320,10 @@ class CompilationConfig:
|
||||
|
||||
If None, defaults to attention ops for piecewise cudagraphs.
|
||||
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
|
||||
compile_mm_encoder: bool = True
|
||||
compile_mm_encoder: bool = False
|
||||
"""Whether or not to compile the multimodal encoder.
|
||||
Currently, this only works for `Qwen2_5_vl`."""
|
||||
Currently, this only works for `Qwen2_5_vl` on selected platforms.
|
||||
Disabled by default until more models are supported/tested to work."""
|
||||
|
||||
# Inductor capture
|
||||
use_inductor: bool | None = None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user