From 426cc8629f7e630e1c5a0b96fe2db737a170a06d Mon Sep 17 00:00:00 2001 From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com> Date: Tue, 2 Sep 2025 21:57:59 -0700 Subject: [PATCH] [BugFix] Fix routed_scaling_factor double mul for dots1 and glm4 MoE models (#24132) Signed-off-by: Yong Hoon Shin --- vllm/model_executor/models/dots1.py | 3 ++- vllm/model_executor/models/glm4_moe.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py index a5477af8694b4..4ddf906dddefe 100644 --- a/vllm/model_executor/models/dots1.py +++ b/vllm/model_executor/models/dots1.py @@ -137,7 +137,8 @@ class Dots1MoE(nn.Module): topk_group=config.topk_group, prefix=f"{prefix}.experts", scoring_func=config.scoring_func, - routed_scaling_factor=self.routed_scaling_factor, + # we do scaling outside, set factor to 1.0 to avoid double mul + routed_scaling_factor=1.0, e_score_correction_bias=self.gate.e_score_correction_bias) if config.n_shared_experts is not None: diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index 06ed453ec29f9..284506b642d66 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -159,7 +159,8 @@ class Glm4MoE(nn.Module): topk_group=config.topk_group, prefix=f"{prefix}.experts", scoring_func="sigmoid", - routed_scaling_factor=self.routed_scaling_factor, + # we do scaling outside, set factor to 1.0 to avoid double mul + routed_scaling_factor=1.0, e_score_correction_bias=self.gate.e_score_correction_bias, enable_eplb=self.enable_eplb, num_redundant_experts=self.n_redundant_experts)