fuse fp32 for GLM-4.5 e_score_correction_bias (#22143)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
2025-12-13 21:25:33 +08:00 · 2025-08-04 00:04:54 +08:00 · 2025-08-04 00:04:54 +08:00 · d3c18c9cb0
commit d3c18c9cb0
parent 83f7bbb318
1 changed files with 2 additions and 3 deletions
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@ -125,9 +125,8 @@ class Glm4MoE(nn.Module):
                                     quant_config=None,
                                     prefix=f"{prefix}.gate")
-        # noaux_tc is not set in transformers new config now
+        self.gate.e_score_correction_bias = nn.Parameter(
-        self.gate.e_score_correction_bias = (nn.Parameter(
+            torch.empty(config.n_routed_experts, dtype=torch.float32))
            torch.empty(config.n_routed_experts)))
        # Load balancing settings.
        vllm_config = get_current_vllm_config()