[Bugfix] Move flashinfer kernel check into ``__init__` function of `FusedMoE`` (#29018)

Signed-off-by: Max Hu <hyoung2991@gmail.com>
2026-03-16 13:47:18 +08:00 · 2025-11-19 16:54:15 -05:00 · 2025-11-19 16:54:15 -05:00 · cb0a7b4bea
commit cb0a7b4bea
parent 8f4f77a727
1 changed files with 4 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -574,6 +574,9 @@ class FusedMoE(CustomOp):
            is_act_and_mul=is_act_and_mul,
            is_lora_enabled=vllm_config.lora_config is not None,
        )
+        self.moe_config_use_flashinfer_cutlass_kernels = (
+            self.moe_config.use_flashinfer_cutlass_kernels
+        )

        self.quant_config = quant_config

@ -728,7 +731,7 @@ class FusedMoE(CustomOp):
        return (
            self.moe_quant_config is not None
            and self.moe_quant_config.quant_dtype == "nvfp4"
-            and self.moe_config.use_flashinfer_cutlass_kernels
+            and self.moe_config_use_flashinfer_cutlass_kernels
        )

    @property