[BugFix] Workspace allocation during profile run : DeepEPHighThroughput + DeepGEMM (#30899)

(cherry picked from commit e3fc374a9a69dddb16885d810f1e28d3fdd39ebd)
2026-07-08 13:57:15 +08:00 · 2025-12-17 18:00:59 -05:00 · 2025-12-17 18:00:59 -05:00 · 17f3988094
commit 17f3988094
parent 682c38583c
1 changed files with 4 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@ -795,7 +795,10 @@ class FusedMoEModularKernel(torch.nn.Module):
                    top_k,
                    global_num_experts,
                    local_num_experts,
-                    expert_tokens_meta,
+                    # expert_tokens_meta help in allocating optimal/minimal
+                    # amount of workspace. Mark it None, so we allocate for
+                    # the worst-case scenario.
+                    expert_tokens_meta=None,
                )
            )