[Bugfix] Fix Qwen3-VL-MoE weight loading for EP (#25300)

Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang 2025-09-20 00:04:05 -07:00 committed by GitHub
parent 9607d5eb44
commit be874c0201
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -122,9 +122,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
def load_fused_expert_weights(self, name: str, params_dict: dict,
loaded_weight: torch.Tensor, shard_id: str,
num_experts: int):
num_experts: int) -> bool:
param = params_dict[name]
weight_loader = typing.cast(Callable[..., bool], param.weight_loader)
loaded_local_expert = False
for expert_id in range(num_experts):
curr_expert_weight = loaded_weight[expert_id]
success = weight_loader(param,
@ -133,9 +134,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
shard_id,
expert_id,
return_success=True)
if not success:
return False
return True
if success:
loaded_local_expert = True
return loaded_local_expert
def load_weights(self, weights: Iterable[tuple[str,
torch.Tensor]]) -> set[str]:
@ -345,4 +347,4 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
for _ in range(self.deepstack_num_level)
] if self.use_deepstack else None
self.visual_dim = config.vision_config.out_hidden_size
self.multiscale_dim = self.visual_dim * self.deepstack_num_level
self.multiscale_dim = self.visual_dim * self.deepstack_num_level