From cb0a7b4bea26657da989562a10055b7d0b59fd3a Mon Sep 17 00:00:00 2001 From: Max Hu Date: Wed, 19 Nov 2025 16:54:15 -0500 Subject: [PATCH] [Bugfix] Move flashinfer kernel check into ```__init__``` function of ```FusedMoE``` (#29018) Signed-off-by: Max Hu --- vllm/model_executor/layers/fused_moe/layer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 7b15e63e9e350..be1910266c878 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -574,6 +574,9 @@ class FusedMoE(CustomOp): is_act_and_mul=is_act_and_mul, is_lora_enabled=vllm_config.lora_config is not None, ) + self.moe_config_use_flashinfer_cutlass_kernels = ( + self.moe_config.use_flashinfer_cutlass_kernels + ) self.quant_config = quant_config @@ -728,7 +731,7 @@ class FusedMoE(CustomOp): return ( self.moe_quant_config is not None and self.moe_quant_config.quant_dtype == "nvfp4" - and self.moe_config.use_flashinfer_cutlass_kernels + and self.moe_config_use_flashinfer_cutlass_kernels ) @property