From 426cc8629f7e630e1c5a0b96fe2db737a170a06d Mon Sep 17 00:00:00 2001
From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com>
Date: Tue, 2 Sep 2025 21:57:59 -0700
Subject: [PATCH] [BugFix] Fix routed_scaling_factor double mul for dots1 and
 glm4 MoE models (#24132)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
---
 vllm/model_executor/models/dots1.py    | 3 ++-
 vllm/model_executor/models/glm4_moe.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py
index a5477af8694b4..4ddf906dddefe 100644
--- a/vllm/model_executor/models/dots1.py
+++ b/vllm/model_executor/models/dots1.py
@@ -137,7 +137,8 @@ class Dots1MoE(nn.Module):
             topk_group=config.topk_group,
             prefix=f"{prefix}.experts",
             scoring_func=config.scoring_func,
-            routed_scaling_factor=self.routed_scaling_factor,
+            # we do scaling outside, set factor to 1.0 to avoid double mul
+            routed_scaling_factor=1.0,
             e_score_correction_bias=self.gate.e_score_correction_bias)
 
         if config.n_shared_experts is not None:
diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py
index 06ed453ec29f9..284506b642d66 100644
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -159,7 +159,8 @@ class Glm4MoE(nn.Module):
             topk_group=config.topk_group,
             prefix=f"{prefix}.experts",
             scoring_func="sigmoid",
-            routed_scaling_factor=self.routed_scaling_factor,
+            # we do scaling outside, set factor to 1.0 to avoid double mul
+            routed_scaling_factor=1.0,
             e_score_correction_bias=self.gate.e_score_correction_bias,
             enable_eplb=self.enable_eplb,
             num_redundant_experts=self.n_redundant_experts)