diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py index 07d928b597ba..f7c885c2baa3 100644 --- a/vllm/model_executor/layers/quantization/awq_marlin.py +++ b/vllm/model_executor/layers/quantization/awq_marlin.py @@ -140,7 +140,7 @@ class AWQMarlinConfig(QuantizationConfig): from vllm.model_executor.layers.quantization.moe_wna16 import ( MoeWNA16Config) if not check_moe_marlin_supports_layer(layer, self.group_size): - logger.warning_one( + logger.warning_once( f"Layer '{prefix}' is not supported by AWQMoeMarlin. " "Falling back to Moe WNA16 kernels.") return MoeWNA16Config.from_config( diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index c7f9d95f4c2d..703d54b3bee6 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -157,7 +157,7 @@ class GPTQMarlinConfig(QuantizationConfig): from vllm.model_executor.layers.quantization.moe_wna16 import ( MoeWNA16Config) if not check_moe_marlin_supports_layer(layer, self.group_size): - logger.warning_one( + logger.warning_once( f"Layer '{prefix}' is not supported by GPTQMoeMarlin. " "Falling back to Moe WNA16 kernels.") return MoeWNA16Config.from_config(