[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-03-16 15:17:15 +08:00 · 2025-08-26 21:34:57 -04:00 · 2025-08-26 21:34:57 -04:00 · de02b07db4
commit de02b07db4
parent eb1995167e
1 changed files with 2 additions and 2 deletions
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@ -10,8 +10,6 @@ from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig,
                                                  FusedMoEMethodBase)
-from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
-    triton_kernel_moe_forward)
 from vllm.model_executor.layers.linear import (LinearBase,
                                               UnquantizedLinearMethod)
 from vllm.model_executor.layers.quantization import QuantizationMethods
@ -557,6 +555,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
            )[0]
            return trtllm_gen_output
        else:
+            from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (  # noqa: E501
+                triton_kernel_moe_forward)
            return triton_kernel_moe_forward(
                hidden_states=x,
                w1=self.w13_weight_triton_tensor,