fuse fp32 for GLM-4.5 e_score_correction_bias (#22143)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
This commit is contained in:
Yuxuan Zhang 2025-08-04 00:04:54 +08:00 committed by GitHub
parent 83f7bbb318
commit d3c18c9cb0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -125,9 +125,8 @@ class Glm4MoE(nn.Module):
quant_config=None, quant_config=None,
prefix=f"{prefix}.gate") prefix=f"{prefix}.gate")
# noaux_tc is not set in transformers new config now self.gate.e_score_correction_bias = nn.Parameter(
self.gate.e_score_correction_bias = (nn.Parameter( torch.empty(config.n_routed_experts, dtype=torch.float32))
torch.empty(config.n_routed_experts)))
# Load balancing settings. # Load balancing settings.
vllm_config = get_current_vllm_config() vllm_config = get_current_vllm_config()