[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-08-26 21:34:57 -04:00 committed by GitHub
parent eb1995167e
commit de02b07db4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,8 +10,6 @@ from vllm.config import get_current_vllm_config
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig,
FusedMoEMethodBase)
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
triton_kernel_moe_forward)
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.model_executor.layers.quantization import QuantizationMethods
@ -557,6 +555,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
)[0]
return trtllm_gen_output
else:
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import ( # noqa: E501
triton_kernel_moe_forward)
return triton_kernel_moe_forward(
hidden_states=x,
w1=self.w13_weight_triton_tensor,