From 7df0289782ab500b2713b7521979c28de2b21cac Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 25 Nov 2025 17:52:31 -0500
Subject: [PATCH] Change warning logs to debug for unimplemented MXFP4
 Linear/Attention (#29441)

Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
---
 vllm/model_executor/layers/quantization/mxfp4.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py
index 198feb03be3e4..d975131f7cff7 100644
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -196,9 +196,10 @@ class Mxfp4Config(QuantizationConfig):
             # TODO: Add support for MXFP4 Linear Method.
             # MXFP4 LinearMethod is available in AMD-Quark, refer to that implementation
             # if you are interested in enabling MXFP4 here.
-            logger.warning_once(
+            logger.debug_once(
                 "MXFP4 linear layer is not implemented - falling back to "
-                "UnquantizedLinearMethod."
+                "UnquantizedLinearMethod.",
+                scope="local",
             )
             return UnquantizedLinearMethod()
         elif isinstance(layer, FusedMoE):
@@ -208,9 +209,10 @@ class Mxfp4Config(QuantizationConfig):
                 return Mxfp4MoEMethod(layer.moe_config)
         elif isinstance(layer, Attention):
             # TODO: Add support for MXFP4 Attention.
-            logger.warning_once(
+            logger.debug_once(
                 "MXFP4 attention layer is not implemented. "
-                "Skipping quantization for this layer."
+                "Skipping quantization for this layer.",
+                scope="local",
             )
         return None