From f17c075884009a3bfb7c66cb0897719b8e18f196 Mon Sep 17 00:00:00 2001 From: Samit <285365963@qq.com> Date: Sat, 13 Sep 2025 00:12:23 +0800 Subject: [PATCH] [Model] Switch to Fused RMSNorm in GLM-4.1V model (#24733) Signed-off-by: SamitHuang <285365963@qq.com> --- vllm/model_executor/models/glm4_1v.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 4ed07bd060cf..22386a5e819a 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -419,15 +419,16 @@ class Glm4vVisionBlock(nn.Module): max_seqlen: Optional[int] = None, # Only used for Flash Attention seqlens: Optional[list[int]] = None, # Only used for xFormers ) -> torch.Tensor: - x = x + self.attn( + x_attn = self.attn( self.norm1(x), cu_seqlens=cu_seqlens, rotary_pos_emb=rotary_pos_emb, max_seqlen=max_seqlen, seqlens=seqlens, ) + x_fused_norm, residual = self.norm2(x, residual=x_attn) + x = residual + self.mlp(x_fused_norm) - x = x + self.mlp(self.norm2(x)) return x