From 269c4db0a4f2635d0703f53f67decd4bb93b046b Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Fri, 24 Oct 2025 19:29:24 -0400 Subject: [PATCH] [Misc][DP] Guard mxfp4 implementation selection (#27484) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath --- vllm/model_executor/layers/quantization/mxfp4.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 96297c0c4d723..6823fa02a32d7 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ) else: raise NotImplementedError( - "Incompatible Mxfp4 backend for EP batched experts format" + f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for " + "EP batched experts format" ) else: assert self.moe_quant_config is not None @@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs) elif self.mxfp4_backend == Mxfp4Backend.MARLIN: return MarlinExperts(self.moe_quant_config) - else: + elif self.mxfp4_backend == Mxfp4Backend.TRITON: return OAITritonExperts(self.moe_quant_config) + else: + raise NotImplementedError( + f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP" + ) def _route_and_experts( self,