From c0a4b95d6474c72799cd8af4421ce922654c850e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 6 Nov 2025 20:23:17 -0800 Subject: [PATCH] Fix issues from #28242 (#28257) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/compilation.py | 10 +++------- vllm/model_executor/models/qwen2_5_vl.py | 4 +--- vllm/model_executor/models/transformers/utils.py | 11 ----------- vllm/model_executor/models/vision.py | 6 ++++++ 4 files changed, 10 insertions(+), 21 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index b4f5040e338c..c84a060922e3 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -251,13 +251,6 @@ class CompilationConfig: disabled when running with Inductor: mode>=VLLM_COMPILE and use_inductor=True. Inductor generates (fused) Triton kernels for disabled custom ops.""" splitting_ops: list[str] | None = None - - """ - Provide control over whether to compile the multimodal encoder - such as Qwen2_5_vl - """ - compile_mm_encoder: bool = True - """A list of ops to exclude from cudagraphs, used in piecewise compilation. The behavior depends on use_inductor_graph_partition: @@ -275,6 +268,9 @@ class CompilationConfig: If None, defaults to attention ops for piecewise cudagraphs. If empty list [], no ops are excluded (suitable for full cudagraphs).""" + compile_mm_encoder: bool = True + """Whether or not to compile the multimodal encoder. + Currently, this only works for `Qwen2_5_vl`.""" # Inductor capture use_inductor: bool | None = None diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 7cf76bee2aa6..a90cfe96414b 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -67,9 +67,7 @@ from vllm.model_executor.layers.linear import ( from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.models.transformers.utils import ( - should_torch_compile_mm_vit, -) +from vllm.model_executor.models.vision import should_torch_compile_mm_vit from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.evs import ( compute_mrope_for_media, diff --git a/vllm/model_executor/models/transformers/utils.py b/vllm/model_executor/models/transformers/utils.py index 8f09137190fe..267a6e06e6bb 100644 --- a/vllm/model_executor/models/transformers/utils.py +++ b/vllm/model_executor/models/transformers/utils.py @@ -205,14 +205,3 @@ def can_enable_torch_compile(vllm_config: "VllmConfig") -> bool: # Dynamic rope scaling is not compatible with torch.compile rope_scaling: dict = getattr(text_config, "rope_scaling", None) or {} return rope_scaling.get("rope_type") != "dynamic" - - -def should_torch_compile_mm_vit(vllm_config: "VllmConfig") -> bool: - """ - Callable to be passed to `@support_torch_compile`'s `enable_if` argument. - - Defaults to `True` but is disabled in the following situations: - - - The model uses dynamic rope scaling. - """ - return vllm_config.compilation_config.compile_mm_encoder diff --git a/vllm/model_executor/models/vision.py b/vllm/model_executor/models/vision.py index b5f6c60514c0..9f94387c700d 100644 --- a/vllm/model_executor/models/vision.py +++ b/vllm/model_executor/models/vision.py @@ -11,6 +11,7 @@ import torch from transformers import PretrainedConfig from vllm.attention.backends.registry import _Backend +from vllm.config import VllmConfig from vllm.distributed import ( get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, @@ -100,6 +101,11 @@ def get_vit_attn_backend( return current_platform.get_vit_attn_backend(head_size, dtype) +def should_torch_compile_mm_vit(vllm_config: VllmConfig) -> bool: + """Callable to be passed to `@support_torch_compile`'s `enable_if` argument.""" + return vllm_config.compilation_config.compile_mm_encoder + + VisionFeatureSelectStrategyStr = Literal["class", "default", "full"] VisionFeatureSelectStrategy: TypeAlias = (