mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-07 23:57:15 +08:00
[BugFix] Workspace allocation during profile run : DeepEPHighThroughput + DeepGEMM (#30899)
(cherry picked from commit e3fc374a9a69dddb16885d810f1e28d3fdd39ebd)
This commit is contained in:
parent
682c38583c
commit
17f3988094
@ -795,7 +795,10 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
top_k,
|
||||
global_num_experts,
|
||||
local_num_experts,
|
||||
expert_tokens_meta,
|
||||
# expert_tokens_meta help in allocating optimal/minimal
|
||||
# amount of workspace. Mark it None, so we allocate for
|
||||
# the worst-case scenario.
|
||||
expert_tokens_meta=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user