From e31946f86eb7975bb1483cff04ea52ba9b5aa79c Mon Sep 17 00:00:00 2001
From: Xiaozhu Meng <mxz@meta.com>
Date: Wed, 5 Nov 2025 21:52:16 -0800
Subject: [PATCH] [flashinfer] fix FI all2all with FI cutlass moe (#28166)

Signed-off-by: Xiaozhu <mxz297@gmail.com>
---
 .../layers/fused_moe/flashinfer_cutlass_prepare_finalize.py   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
index 97ee20ae9a11..bc9aab5208d9 100644
--- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
@@ -233,12 +233,13 @@ def flashinfer_alltoall_dispatch(
     max_num_token = (
         max(global_num_tokens_cpu) if global_num_tokens_cpu is not None else x.shape[0]
     )
+    orig_topk_weights_dtype = topk_weights.dtype
     alltoall_info, topk_ids, topk_weights, _ = (
         MnnvlMoe.mnnvl_moe_alltoallv_prepare_without_allgather(
             topk_ids,
             topk_weights,
             None,
-            all2all_manager.prepare_workspace,
+            all2all_manager.prepare_workspace_tensor,
             max_num_token,
             ep_rank,
             ep_size,
@@ -247,6 +248,7 @@ def flashinfer_alltoall_dispatch(
             top_k,
         )
     )
+    topk_weights = topk_weights.view(dtype=orig_topk_weights_dtype)
 
     x, x_sf = moe_kernel_quantize_input(
         x,