diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 96297c0c4d723..6823fa02a32d7 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ) else: raise NotImplementedError( - "Incompatible Mxfp4 backend for EP batched experts format" + f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for " + "EP batched experts format" ) else: assert self.moe_quant_config is not None @@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs) elif self.mxfp4_backend == Mxfp4Backend.MARLIN: return MarlinExperts(self.moe_quant_config) - else: + elif self.mxfp4_backend == Mxfp4Backend.TRITON: return OAITritonExperts(self.moe_quant_config) + else: + raise NotImplementedError( + f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP" + ) def _route_and_experts( self,