From 7e4e709b43b6b35011b78247f8b0afde7e41961b Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Date: Tue, 1 Apr 2025 01:58:07 -0400 Subject: [PATCH] [V1] TPU - Fix fused MOE (#15834) Signed-off-by: Alexander Matveev --- vllm/model_executor/layers/fused_moe/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index ef33852e3162..143123e577b1 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -309,7 +309,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): expert_map=expert_map, renormalize=renormalize) - forward_native = forward_cuda + forward_native = forward_tpu if current_platform.is_tpu else forward_cuda def determine_expert_map(