From 930a24144c073a08cfecabd75a242e713bc4f57e Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 2 Sep 2025 18:22:30 -0400 Subject: [PATCH] [Bug] R1 Accuracy: Fix `routed_scaling_factor` Double Mul Issue (#24119) Signed-off-by: yewentao256 --- vllm/model_executor/models/deepseek_v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 36c9427e474e9..3a8eaf681733d 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -160,7 +160,8 @@ class DeepseekV2MoE(nn.Module): topk_group=config.topk_group, prefix=f"{prefix}.experts", scoring_func=config.scoring_func, - routed_scaling_factor=self.routed_scaling_factor, + # we do scaling outside, set factor to 1.0 to avoid double mul + routed_scaling_factor=1.0, e_score_correction_bias=self.gate.e_score_correction_bias, enable_eplb=self.enable_eplb, num_redundant_experts=self.n_redundant_experts)