mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 17:46:23 +08:00
[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
eb1995167e
commit
de02b07db4
@ -10,8 +10,6 @@ from vllm.config import get_current_vllm_config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig,
|
||||
FusedMoEMethodBase)
|
||||
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
|
||||
triton_kernel_moe_forward)
|
||||
from vllm.model_executor.layers.linear import (LinearBase,
|
||||
UnquantizedLinearMethod)
|
||||
from vllm.model_executor.layers.quantization import QuantizationMethods
|
||||
@ -557,6 +555,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
)[0]
|
||||
return trtllm_gen_output
|
||||
else:
|
||||
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import ( # noqa: E501
|
||||
triton_kernel_moe_forward)
|
||||
return triton_kernel_moe_forward(
|
||||
hidden_states=x,
|
||||
w1=self.w13_weight_triton_tensor,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user