[BugFix] Workspace allocation during profile run : DeepEPHighThroughput + DeepGEMM (#30899)

2026-03-16 11:57:14 +08:00 · 2025-12-17 18:00:59 -05:00 · 2025-12-17 18:00:59 -05:00 · e3fc374a9a
commit e3fc374a9a
parent e06d0bf0aa
1 changed files with 4 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@ -795,7 +795,10 @@ class FusedMoEModularKernel(torch.nn.Module):
                    top_k,
                    global_num_experts,
                    local_num_experts,
-                    expert_tokens_meta,
+                    # expert_tokens_meta help in allocating optimal/minimal
+                    # amount of workspace. Mark it None, so we allocate for
+                    # the worst-case scenario.
+                    expert_tokens_meta=None,
                )
            )