From e3fc374a9a69dddb16885d810f1e28d3fdd39ebd Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Wed, 17 Dec 2025 18:00:59 -0500 Subject: [PATCH] [BugFix] Workspace allocation during profile run : DeepEPHighThroughput + DeepGEMM (#30899) --- vllm/model_executor/layers/fused_moe/modular_kernel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 484314091cb15..b0834e861338f 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -795,7 +795,10 @@ class FusedMoEModularKernel(torch.nn.Module): top_k, global_num_experts, local_num_experts, - expert_tokens_meta, + # expert_tokens_meta help in allocating optimal/minimal + # amount of workspace. Mark it None, so we allocate for + # the worst-case scenario. + expert_tokens_meta=None, ) )