From f5dfa0753163530117b4766c4e79e8cb2dc7066e Mon Sep 17 00:00:00 2001
From: noiji <52301388+noiji@users.noreply.github.com>
Date: Mon, 30 Jun 2025 18:21:56 +0900
Subject: [PATCH] [Bugfix] Skip loading extra parameters for modelopt Qwen3 MoE
 model (#19598)

Signed-off-by: noiji <>
---
 vllm/model_executor/models/qwen3_moe.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py
index 417d7b22088bf..90a28192eccbc 100644
--- a/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm/model_executor/models/qwen3_moe.py
@@ -386,6 +386,11 @@ class Qwen3MoeModel(nn.Module):
             ("gate_up_proj", "up_proj", 1),
         ]
 
+        # Skip loading extra parameters for GPTQ/modelopt models.
+        ignore_suffixes = (".bias", "_bias", ".k_scale", "_k_scale",
+                           ".v_scale", "_v_scale", ".weight_scale",
+                           "_weight_scale", ".input_scale", "_input_scale")
+
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
         expert_params_mapping = FusedMoE.make_expert_params_mapping(
@@ -410,10 +415,11 @@ class Qwen3MoeModel(nn.Module):
                 if "mlp.experts" in name:
                     continue
                 name = name.replace(weight_name, param_name)
-                # Skip loading extra bias for GPTQ models.
-                if ((name.endswith(".bias") or name.endswith("_bias"))
-                        and name not in params_dict):
+
+                # Skip loading extra parameters for GPTQ/modelopt models.
+                if name.endswith(ignore_suffixes) and name not in params_dict:
                     continue
+
                 # Skip layers on other devices.
                 if is_pp_missing_parameter(name, self):
                     continue
@@ -433,9 +439,9 @@ class Qwen3MoeModel(nn.Module):
                     # Skip layers on other devices.
                     if is_pp_missing_parameter(name, self):
                         continue
-                    # Skip loading extra bias for GPTQ models.
-                    if ((name.endswith(".bias") or name.endswith("_bias"))
-                            and name not in params_dict):
+                    # Skip loading extra parameters for GPTQ/modelopt models.
+                    if name.endswith(
+                            ignore_suffixes) and name not in params_dict:
                         continue
                     param = params_dict[name]
                     weight_loader = param.weight_loader
@@ -446,9 +452,9 @@ class Qwen3MoeModel(nn.Module):
                                   expert_id=expert_id)
                     break
                 else:
-                    # Skip loading extra bias for GPTQ models.
-                    if ((name.endswith(".bias") or name.endswith("_bias"))
-                            and name not in params_dict):
+                    # Skip loading extra parameters for GPTQ/modelopt models.
+                    if name.endswith(
+                            ignore_suffixes) and name not in params_dict:
                         continue
                     # Skip layers on other devices.
                     if is_pp_missing_parameter(name, self):