diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index ef33852e3162..143123e577b1 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -309,7 +309,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): expert_map=expert_map, renormalize=renormalize) - forward_native = forward_cuda + forward_native = forward_tpu if current_platform.is_tpu else forward_cuda def determine_expert_map(