mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 16:35:55 +08:00
[Bugfix] fix Qwen3VLMoe load when pp > 1 (#25838)
Signed-off-by: liuye.hj <liuye.hj@alibaba-inc.com> Co-authored-by: liuye.hj <liuye.hj@alibaba-inc.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
e40c12696a
commit
0b343e3218
@ -212,6 +212,8 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
|
||||
# attempted to load as other weights later
|
||||
is_expert_weight = True
|
||||
name_mapped = name.replace(weight_name, param_name)
|
||||
if is_pp_missing_parameter(name_mapped, self):
|
||||
continue
|
||||
if is_fused_expert:
|
||||
loaded_weight = loaded_weight.transpose(-1,
|
||||
-2) # no bias
|
||||
@ -230,8 +232,6 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
|
||||
name_mapped, params_dict, loaded_weight,
|
||||
shard_id, num_experts)
|
||||
else:
|
||||
if is_pp_missing_parameter(name_mapped, self):
|
||||
continue
|
||||
# Skip loading extra parameters for GPTQ/modelopt models
|
||||
if name_mapped.endswith(
|
||||
ignore_suffixes
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user