diff --git a/vllm/lora/layers/column_parallel_linear.py b/vllm/lora/layers/column_parallel_linear.py index 8273046bb6ecd..904025901fba7 100644 --- a/vllm/lora/layers/column_parallel_linear.py +++ b/vllm/lora/layers/column_parallel_linear.py @@ -340,12 +340,7 @@ class QKVParallelLinearWithLoRA(ColumnParallelLinearWithLoRA): packed_modules_list: list, model_config: PretrainedConfig | None = None, ) -> bool: - # Vision tower QKV has packed_modules_list=[] (already packed in checkpoint) - # Language models have packed_modules_list=[module_name] - # (single LoRA for qkv_proj) - return type(source_layer) is QKVParallelLinear and ( - len(packed_modules_list) <= 1 - ) + return type(source_layer) is QKVParallelLinear and len(packed_modules_list) == 1 class MergedQKVParallelLinearWithLoRA(MergedColumnParallelLinearWithLoRA): diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 69fac625dde4c..76e2627b65ca6 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -1061,6 +1061,7 @@ class Qwen2_5_VLForConditionalGeneration( packed_modules_mapping = { "qkv_proj": ["q_proj", "k_proj", "v_proj"], "gate_up_proj": ["gate_proj", "up_proj"], + "qkv": ["qkv"], # For vision tower's already-packed QKV } # To ensure correct weight loading and mapping. diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index eca21bad718f0..8a34e6e77faf6 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -1201,6 +1201,7 @@ class Qwen3VLForConditionalGeneration( "gate_proj", "up_proj", ], + "qkv": ["qkv"], # For vision tower's already-packed QKV } supports_encoder_tp_data = True