[Bugfix] Fix Qwen2.5-VL quantized model weights loading (#23512)

Signed-off-by: Zifei Tong <zifeitong@gmail.com>
2025-12-24 18:45:02 +08:00 · 2025-08-24 19:40:22 -07:00 · 2025-08-24 19:40:22 -07:00 · a71e4765cc
commit a71e4765cc
parent 39971db3aa
1 changed files with 5 additions and 1 deletions
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@ -135,7 +135,7 @@ class Qwen2_5_VLVideoPixelInputs(TypedDict):

    second_per_grid_ts: torch.Tensor
    """
-    The video time interval (in seconds) for each grid along the temporal 
+    The video time interval (in seconds) for each grid along the temporal
    dimension in the 3D position IDs. Returned when `videos` is not `None`.
    """

@ -852,6 +852,10 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal,
                                         SupportsLoRA, SupportsPP,
                                         SupportsQuant):

+    packed_modules_mapping = {
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
    # To ensure correct weight loading and mapping.
    hf_to_vllm_mapper = WeightsMapper(
        orig_to_new_prefix={