mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-29 08:57:13 +08:00
[BugFix] Fix DeepGEMM over-allocating workspace (#28254)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
bf6a3d0ff5
commit
6dec9f6109
@ -215,7 +215,7 @@ class DeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
||||
)
|
||||
assert M_sum % block_m == 0
|
||||
|
||||
workspace1 = (M_sum, max(N, K))
|
||||
workspace1 = (M_sum, N)
|
||||
workspace2 = (M_sum, max(N // 2, K))
|
||||
output = (M, K)
|
||||
return (workspace1, workspace2, output)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user