diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 2d9f5e52bd65a..eb8ffa37882cb 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -116,8 +116,9 @@ class AutoRoundConfig(QuantizationConfig): quantized = True if self.block_name_to_quantize: - quantized = any(name in layer_name - for name in self.block_name_to_quantize) + quantized = any( + layer_name.startswith(name) + for name in self.block_name_to_quantize) elif isinstance(layer, ParallelLMHead): quantized = False