From 6dec9f61098786690b4ca2140682dbafb849f8d9 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Mon, 10 Nov 2025 17:01:17 -0500 Subject: [PATCH] [BugFix] Fix DeepGEMM over-allocating workspace (#28254) Signed-off-by: Lucas Wilkinson --- vllm/model_executor/layers/fused_moe/deep_gemm_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py index 484b8aa9d107c..86cdd25f2c873 100644 --- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py @@ -215,7 +215,7 @@ class DeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): ) assert M_sum % block_m == 0 - workspace1 = (M_sum, max(N, K)) + workspace1 = (M_sum, N) workspace2 = (M_sum, max(N // 2, K)) output = (M, K) return (workspace1, workspace2, output)