Change warning logs to debug for unimplemented MXFP4 Linear/Attention (#29441)

Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
Michael Goin 2025-11-25 17:52:31 -05:00 committed by GitHub
parent 0abc79482a
commit 7df0289782
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -196,9 +196,10 @@ class Mxfp4Config(QuantizationConfig):
# TODO: Add support for MXFP4 Linear Method.
# MXFP4 LinearMethod is available in AMD-Quark, refer to that implementation
# if you are interested in enabling MXFP4 here.
logger.warning_once(
logger.debug_once(
"MXFP4 linear layer is not implemented - falling back to "
"UnquantizedLinearMethod."
"UnquantizedLinearMethod.",
scope="local",
)
return UnquantizedLinearMethod()
elif isinstance(layer, FusedMoE):
@ -208,9 +209,10 @@ class Mxfp4Config(QuantizationConfig):
return Mxfp4MoEMethod(layer.moe_config)
elif isinstance(layer, Attention):
# TODO: Add support for MXFP4 Attention.
logger.warning_once(
logger.debug_once(
"MXFP4 attention layer is not implemented. "
"Skipping quantization for this layer."
"Skipping quantization for this layer.",
scope="local",
)
return None