[Bugfix] Fix Qwen3-VL-MoE weight loading for EP (#25300)

Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-12-21 16:45:01 +08:00 · 2025-09-20 00:04:05 -07:00 · 2025-09-20 00:04:05 -07:00 · 0ac65d171b
commit 0ac65d171b
parent 267b4421b7
1 changed files with 7 additions and 5 deletions
--- a/vllm/model_executor/models/qwen3_vl_moe.py
+++ b/vllm/model_executor/models/qwen3_vl_moe.py
@ -122,9 +122,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):

    def load_fused_expert_weights(self, name: str, params_dict: dict,
                                  loaded_weight: torch.Tensor, shard_id: str,
-                                  num_experts: int):
+                                  num_experts: int) -> bool:
        param = params_dict[name]
        weight_loader = typing.cast(Callable[..., bool], param.weight_loader)
+        loaded_local_expert = False
        for expert_id in range(num_experts):
            curr_expert_weight = loaded_weight[expert_id]
            success = weight_loader(param,
@ -133,9 +134,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
                                    shard_id,
                                    expert_id,
                                    return_success=True)
-            if not success:
-                return False
-        return True
+            if success:
+                loaded_local_expert = True
+
+        return loaded_local_expert

    def load_weights(self, weights: Iterable[tuple[str,
                                                   torch.Tensor]]) -> set[str]: