From 2f2fcb31b81f6025a2cc3cb9fe5b95bd03a5861b Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Fri, 4 Jul 2025 05:41:13 +0800 Subject: [PATCH] [Misc] Remove _maybe_ignore_quant_config from GLM4.1v (#20432) Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> --- vllm/model_executor/models/glm4_1v.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 303cbdb25945b..298700e23e507 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -55,9 +55,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear, QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.multimodal import MULTIMODAL_REGISTRY @@ -179,6 +176,7 @@ class Glm4vVisionMLP(nn.Module): hidden_features: int, bias: bool = False, quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", ): super().__init__() self.gate_up_proj = MergedColumnParallelLinear( @@ -186,13 +184,12 @@ class Glm4vVisionMLP(nn.Module): output_sizes=[hidden_features] * 2, bias=bias, quant_config=quant_config, - ) - self.down_proj = RowParallelLinear( - hidden_features, - in_features, - bias=bias, - quant_config=quant_config, - ) + prefix=f"{prefix}.gate_up_proj") + self.down_proj = RowParallelLinear(hidden_features, + in_features, + bias=bias, + quant_config=quant_config, + prefix=f"{prefix}.down_proj") self.act_fn = SiluAndMul() def forward(self, x: torch.Tensor): @@ -407,6 +404,7 @@ class Glm4vVisionBlock(nn.Module): mlp_hidden_dim, bias=False, quant_config=quant_config, + prefix=f"{prefix}.mlp", ) def forward( @@ -1278,7 +1276,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal, self.visual = Glm4vVisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-5), - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=maybe_prefix(prefix, "visual"), ) @@ -1291,13 +1289,6 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal, self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors) - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid vision encoder sections for some models. - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): - return None - return quant_config - def _validate_and_reshape_mm_tensor(self, mm_input: object, name: str) -> torch.Tensor: if not isinstance(mm_input, (torch.Tensor, list)):