[Misc] Turn off encoder torch compile by default (#28634)

Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang 2025-11-13 08:38:08 -08:00 committed by GitHub
parent b230286fbc
commit d3387750f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 5 deletions

View File

@ -10,8 +10,8 @@ from vllm.platforms import current_platform
def test_compile():
vllm_config = VllmConfig()
# Default configuration compiles mm encoder
assert vllm_config.compilation_config.compile_mm_encoder
# Default configuration does not compile mm encoder
assert not vllm_config.compilation_config.compile_mm_encoder
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@ -39,7 +39,10 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
"Qwen/Qwen2.5-VL-3B-Instruct",
max_model_len=2048,
gpu_memory_utilization=0.8,
compilation_config={"mode": CompilationMode.VLLM_COMPILE},
compilation_config={
"mode": CompilationMode.VLLM_COMPILE,
"compile_mm_encoder": True,
},
) as _,
):
pass

View File

@ -131,6 +131,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
enforce_eager=False,
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
@ -160,6 +161,7 @@ VLM_TEST_SETTINGS = {
VLMTestType.MULTI_IMAGE,
VLMTestType.VIDEO,
),
enforce_eager=False,
needs_video_metadata=True,
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501

View File

@ -320,9 +320,10 @@ class CompilationConfig:
If None, defaults to attention ops for piecewise cudagraphs.
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
compile_mm_encoder: bool = True
compile_mm_encoder: bool = False
"""Whether or not to compile the multimodal encoder.
Currently, this only works for `Qwen2_5_vl`."""
Currently, this only works for `Qwen2_5_vl` on selected platforms.
Disabled by default until more models are supported/tested to work."""
# Inductor capture
use_inductor: bool | None = None