From 0310029a2fc62171fae87155150326125e082a5a Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-neuralmagic@users.noreply.github.com> Date: Thu, 25 Jul 2024 01:34:11 -0400 Subject: [PATCH] [Bugfix] Fix awq_marlin and gptq_marlin flags (#6745) --- vllm/model_executor/layers/quantization/awq_marlin.py | 5 +++-- vllm/model_executor/layers/quantization/gptq_marlin.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py index 092f87b623e7f..5ffbb8e854e87 100644 --- a/vllm/model_executor/layers/quantization/awq_marlin.py +++ b/vllm/model_executor/layers/quantization/awq_marlin.py @@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig): def __init__(self, weight_bits: int, group_size: int, has_zp: bool, lm_head_quantized: bool) -> None: self.weight_bits = weight_bits - self.pack_factor = 32 // self.weight_bits # packed into int32 + self.pack_factor = 32 // self.weight_bits # packed into 32bits self.group_size = group_size self.has_zp = has_zp self.lm_head_quantized = lm_head_quantized @@ -69,7 +69,8 @@ class AWQMarlinConfig(QuantizationConfig): def override_quantization_method(cls, hf_quant_cfg, user_quant) -> Optional[str]: can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg) - is_valid_user_quant = (user_quant is None or user_quant == "marlin") + is_valid_user_quant = (user_quant is None or user_quant == "marlin" + or user_quant == "awq_marlin") if can_convert and is_valid_user_quant: msg = ("The model is convertible to {} during runtime." diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index 5b4d614ae2e74..bdcc9c3b4f0c5 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -79,7 +79,8 @@ class GPTQMarlinConfig(QuantizationConfig): user_quant) -> Optional[str]: can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg) - is_valid_user_quant = (user_quant is None or user_quant == "marlin") + is_valid_user_quant = (user_quant is None or user_quant == "marlin" + or user_quant == "gptq_marlin") if can_convert and is_valid_user_quant: msg = ("The model is convertible to {} during runtime."