From 9e7e5baaa83b1e5070a3cf3823c134b28eaa2a1c Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Wed, 13 Aug 2025 16:23:33 +0800 Subject: [PATCH] [Model] Add missing prefix to glm4_1v (#22716) Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> --- vllm/model_executor/models/glm4_1v.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 2a89c03bfe7e..88c53c836327 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -453,25 +453,30 @@ class Glm4vPatchMerger(nn.Module): context_dim: int, quant_config: Optional[QuantizationConfig] = None, bias: bool = False, + prefix: str = "", ) -> None: super().__init__() self.hidden_size = d_model self.proj = ColumnParallelLinear(self.hidden_size, self.hidden_size, bias=bias, - gather_output=True) + gather_output=True, + quant_config=quant_config, + prefix=f"{prefix}.proj") self.post_projection_norm = nn.LayerNorm(self.hidden_size) self.gate_up_proj = MergedColumnParallelLinear( input_size=self.hidden_size, output_sizes=[context_dim] * 2, bias=bias, quant_config=quant_config, + prefix=f"{prefix}.gate_up_proj", ) self.down_proj = RowParallelLinear( context_dim, self.hidden_size, bias=bias, quant_config=quant_config, + prefix=f"{prefix}.down_proj", ) self.act_fn = SiluAndMul() self.extra_activation_func = nn.GELU() @@ -661,6 +666,7 @@ class Glm4vVisionTransformer(nn.Module): context_dim=vision_config.intermediate_size, quant_config=quant_config, bias=False, + prefix=f"{prefix}.merger", ) self.embeddings = Glm4vVisionEmbeddings(vision_config)