[Misc] Turn off encoder torch compile by default (#28634)

Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang 2025-11-13 08:38:08 -08:00 committed by GitHub
parent b230286fbc
commit d3387750f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 5 deletions

View File

@ -10,8 +10,8 @@ from vllm.platforms import current_platform
def test_compile(): def test_compile():
vllm_config = VllmConfig() vllm_config = VllmConfig()
# Default configuration compiles mm encoder # Default configuration does not compile mm encoder
assert vllm_config.compilation_config.compile_mm_encoder assert not vllm_config.compilation_config.compile_mm_encoder
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073 # forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@ -39,7 +39,10 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
"Qwen/Qwen2.5-VL-3B-Instruct", "Qwen/Qwen2.5-VL-3B-Instruct",
max_model_len=2048, max_model_len=2048,
gpu_memory_utilization=0.8, gpu_memory_utilization=0.8,
compilation_config={"mode": CompilationMode.VLLM_COMPILE}, compilation_config={
"mode": CompilationMode.VLLM_COMPILE,
"compile_mm_encoder": True,
},
) as _, ) as _,
): ):
pass pass

View File

@ -131,6 +131,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
enforce_eager=False,
max_model_len=4096, max_model_len=4096,
max_num_seqs=2, max_num_seqs=2,
auto_cls=AutoModelForImageTextToText, auto_cls=AutoModelForImageTextToText,
@ -160,6 +161,7 @@ VLM_TEST_SETTINGS = {
VLMTestType.MULTI_IMAGE, VLMTestType.MULTI_IMAGE,
VLMTestType.VIDEO, VLMTestType.VIDEO,
), ),
enforce_eager=False,
needs_video_metadata=True, needs_video_metadata=True,
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501 img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501

View File

@ -320,9 +320,10 @@ class CompilationConfig:
If None, defaults to attention ops for piecewise cudagraphs. If None, defaults to attention ops for piecewise cudagraphs.
If empty list [], no ops are excluded (suitable for full cudagraphs).""" If empty list [], no ops are excluded (suitable for full cudagraphs)."""
compile_mm_encoder: bool = True compile_mm_encoder: bool = False
"""Whether or not to compile the multimodal encoder. """Whether or not to compile the multimodal encoder.
Currently, this only works for `Qwen2_5_vl`.""" Currently, this only works for `Qwen2_5_vl` on selected platforms.
Disabled by default until more models are supported/tested to work."""
# Inductor capture # Inductor capture
use_inductor: bool | None = None use_inductor: bool | None = None