From 60279d272e3b39052c58fff5316222f6ca249372 Mon Sep 17 00:00:00 2001 From: Yongye Zhu Date: Fri, 19 Dec 2025 22:16:28 +0000 Subject: [PATCH] change top level interface to mk --- .../layers/fused_moe/unquantized_fused_moe_method.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py index 82dbccf3fa9da..781345804ec82 100644 --- a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py +++ b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py @@ -31,6 +31,10 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.platforms.interface import CpuArchEnum from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe +import vllm.model_executor.layers.fused_moe.modular_kernel as mk +from vllm.model_executor.layers.fused_moe.prepare_finalize import ( + MoEPrepareAndFinalizeNoEP, +) if current_platform.is_cuda_alike(): from .fused_batched_moe import BatchedTritonExperts