From 825fdb11add30237e7f592f1a132d3913cd632ec Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 19 Sep 2025 15:41:12 +0800 Subject: [PATCH] [Bugfix][CPU] Add placeholder to avoid import errors when using fused_moe ops on platforms without triton (#25137) Signed-off-by: jiang1.li --- vllm/model_executor/layers/fused_moe/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py index 6730f051e3d71..75f56cd01a4ea 100644 --- a/vllm/model_executor/layers/fused_moe/__init__.py +++ b/vllm/model_executor/layers/fused_moe/__init__.py @@ -78,3 +78,12 @@ if HAS_TRITON: "TritonOrDeepGemmExperts", "BatchedTritonOrDeepGemmExperts", ] +else: + # Some model classes directly use the custom ops. Add placeholders + # to avoid import errors. + def _raise_exception(method: str): + raise NotImplementedError( + f"{method} is not implemented as lack of triton.") + + fused_topk = lambda *args, **kwargs: _raise_exception("fused_topk") + fused_experts = lambda *args, **kwargs: _raise_exception("fused_experts")