From 87778d5f00ab09f88d0b677c34ae7d33e6296ce2 Mon Sep 17 00:00:00 2001 From: Heng Guo Date: Tue, 21 Oct 2025 06:23:30 +0800 Subject: [PATCH] [Feature][Quantization] auto_round support for mixed bits quantization (#23812) Signed-off-by: n1ck-guo Signed-off-by: Heng Guo Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/model_executor/layers/quantization/auto_round.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 0e4815be603e..f1943d461187 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -436,6 +436,12 @@ class AutoRoundConfig(QuantizationConfig): return None def get_quant_method(self, layer: torch.nn.Module, prefix: str): + if prefix and self.extra_config: + for layer_name in self.extra_config: + if ( + layer_name == prefix or layer_name == f"model.{prefix}" + ) and self.extra_config[layer_name].get("bits", 16) >= 16: + return UnquantizedLinearMethod() if ( current_platform.is_cpu() or current_platform.is_xpu()