From 7f21e8052b5f3948c8a59514a8dc1e9c5eef70d6 Mon Sep 17 00:00:00 2001 From: "rongfu.leng" Date: Sat, 31 May 2025 01:34:22 +0800 Subject: [PATCH] [Misc] add group_size is -1 in awq quantization (#18910) Signed-off-by: rongfu.leng --- vllm/model_executor/layers/quantization/awq.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/awq.py b/vllm/model_executor/layers/quantization/awq.py index 4660c28c8de4a..87afdb623d912 100644 --- a/vllm/model_executor/layers/quantization/awq.py +++ b/vllm/model_executor/layers/quantization/awq.py @@ -101,7 +101,13 @@ class AWQLinearMethod(LinearMethodBase): output_partition_sizes: list[int], input_size: int, output_size: int, params_dtype: torch.dtype, **extra_weight_attrs): - if input_size_per_partition % self.quant_config.group_size != 0: + # Normalize group_size + if self.quant_config.group_size != -1: + group_size = self.quant_config.group_size + else: + group_size = input_size + + if input_size_per_partition % group_size != 0: raise ValueError( "The input size is not aligned with the quantized " "weight shape. This can be caused by too large " @@ -127,9 +133,11 @@ class AWQLinearMethod(LinearMethodBase): packed_factor=self.quant_config.pack_factor, weight_loader=weight_loader) + num_groups = input_size_per_partition // group_size + qzeros = PackedvLLMParameter( data=torch.empty( - input_size_per_partition // self.quant_config.group_size, + num_groups, output_size_per_partition // self.quant_config.pack_factor, dtype=torch.int32, ), @@ -140,7 +148,7 @@ class AWQLinearMethod(LinearMethodBase): weight_loader=weight_loader) scales = GroupQuantScaleParameter(data=torch.empty( - input_size_per_partition // self.quant_config.group_size, + num_groups, output_size_per_partition, dtype=params_dtype, ),