[Misc] Fix expert_ids shape in MoE (#4517)

2026-03-16 16:07:15 +08:00 · 2024-05-01 16:47:59 -07:00 · 2024-05-01 16:47:59 -07:00 · 826b82a260
commit 826b82a260
parent c9d852d601
1 changed files with 6 additions and 5 deletions
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@ -203,14 +203,15 @@ def moe_align_block_size(
    - The padding ensures that the total number of tokens is now divisible
        by block_size for proper block matrix operations.
    """
-    sorted_ids = torch.empty(
-        (topk_ids.numel() + num_experts * (block_size - 1), ),
-        dtype=torch.int32,
-        device=topk_ids.device)
-    expert_ids = torch.empty((topk_ids.numel() + num_experts, ),
+    max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
+    sorted_ids = torch.empty((max_num_tokens_padded, ),
                             dtype=torch.int32,
                             device=topk_ids.device)
    sorted_ids.fill_(topk_ids.numel())
+    max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
+    expert_ids = torch.empty((max_num_m_blocks, ),
+                             dtype=torch.int32,
+                             device=topk_ids.device)
    num_tokens_post_pad = torch.empty((1),
                                      dtype=torch.int32,
                                      device=topk_ids.device)