mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-19 22:17:28 +08:00
Optimize memory.
The tensor workspace13 is used only with one shape. Signed-off-by: Andrey Khalyavin <halyavin@yandex-team.ru>
This commit is contained in:
parent
676db55eec
commit
a4af2e7b3a
@ -312,7 +312,7 @@ class BatchedDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
||||
num_dispatchers = self.num_dispatchers
|
||||
num_experts = local_num_experts
|
||||
max_num_tokens = M if self.max_num_tokens is None else self.max_num_tokens
|
||||
workspace13 = (num_experts, max_num_tokens * num_dispatchers, max(K, N))
|
||||
workspace13 = (num_experts, max_num_tokens * num_dispatchers, N)
|
||||
workspace2 = (num_experts, max_num_tokens * num_dispatchers, (N // 2))
|
||||
output = (num_experts, max_num_tokens * num_dispatchers, K)
|
||||
return (workspace13, workspace2, output)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user