From daec4d2624cb816b92d5463c7f47878a342c7e76 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Fri, 17 Oct 2025 19:47:00 +0800 Subject: [PATCH] [Model]Improve Qwen3VLMoeForConditionalGeneration packed_modules_mapping (#27096) Signed-off-by: Jee Jee Li --- vllm/model_executor/models/qwen3_vl_moe.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 21b2e395c77f..284b1301d07f 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -350,6 +350,14 @@ class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM): dummy_inputs=Qwen3VLDummyInputsBuilder, ) class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + } + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super(Qwen3VLForConditionalGeneration, self).__init__() config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config @@ -376,6 +384,11 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): self.language_model = Qwen3MoeLLMForCausalLM( vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model") ) + # Whether to include the gate_up_proj mapping is determined by + # the language model. + self.packed_modules_mapping = ( + self.packed_modules_mapping | self.language_model.packed_modules_mapping + ) self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors