From d3c18c9cb0b6c42eab4ed7251adbf68dde4da39a Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Mon, 4 Aug 2025 00:04:54 +0800 Subject: [PATCH] fuse fp32 for GLM-4.5 e_score_correction_bias (#22143) Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> --- vllm/model_executor/models/glm4_moe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index 6a196fef572d..c702684c6caa 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -125,9 +125,8 @@ class Glm4MoE(nn.Module): quant_config=None, prefix=f"{prefix}.gate") - # noaux_tc is not set in transformers new config now - self.gate.e_score_correction_bias = (nn.Parameter( - torch.empty(config.n_routed_experts))) + self.gate.e_score_correction_bias = nn.Parameter( + torch.empty(config.n_routed_experts, dtype=torch.float32)) # Load balancing settings. vllm_config = get_current_vllm_config()