[Bugfix] Skip loading extra parameters for modelopt Qwen3 MoE model (#19598)

Signed-off-by: noiji <>
This commit is contained in:
noiji 2025-06-30 18:21:56 +09:00 committed by GitHub
parent 022c58b80f
commit f5dfa07531
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -386,6 +386,11 @@ class Qwen3MoeModel(nn.Module):
("gate_up_proj", "up_proj", 1),
]
# Skip loading extra parameters for GPTQ/modelopt models.
ignore_suffixes = (".bias", "_bias", ".k_scale", "_k_scale",
".v_scale", "_v_scale", ".weight_scale",
"_weight_scale", ".input_scale", "_input_scale")
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping = FusedMoE.make_expert_params_mapping(
@ -410,10 +415,11 @@ class Qwen3MoeModel(nn.Module):
if "mlp.experts" in name:
continue
name = name.replace(weight_name, param_name)
# Skip loading extra bias for GPTQ models.
if ((name.endswith(".bias") or name.endswith("_bias"))
and name not in params_dict):
# Skip loading extra parameters for GPTQ/modelopt models.
if name.endswith(ignore_suffixes) and name not in params_dict:
continue
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue
@ -433,9 +439,9 @@ class Qwen3MoeModel(nn.Module):
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):
continue
# Skip loading extra bias for GPTQ models.
if ((name.endswith(".bias") or name.endswith("_bias"))
and name not in params_dict):
# Skip loading extra parameters for GPTQ/modelopt models.
if name.endswith(
ignore_suffixes) and name not in params_dict:
continue
param = params_dict[name]
weight_loader = param.weight_loader
@ -446,9 +452,9 @@ class Qwen3MoeModel(nn.Module):
expert_id=expert_id)
break
else:
# Skip loading extra bias for GPTQ models.
if ((name.endswith(".bias") or name.endswith("_bias"))
and name not in params_dict):
# Skip loading extra parameters for GPTQ/modelopt models.
if name.endswith(
ignore_suffixes) and name not in params_dict:
continue
# Skip layers on other devices.
if is_pp_missing_parameter(name, self):