mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 12:36:32 +08:00
[BugFix] Workspace allocation during profile run : DeepEPHighThroughput + DeepGEMM (#30899)
This commit is contained in:
parent
e06d0bf0aa
commit
e3fc374a9a
@ -795,7 +795,10 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
top_k,
|
||||
global_num_experts,
|
||||
local_num_experts,
|
||||
expert_tokens_meta,
|
||||
# expert_tokens_meta help in allocating optimal/minimal
|
||||
# amount of workspace. Mark it None, so we allocate for
|
||||
# the worst-case scenario.
|
||||
expert_tokens_meta=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user