mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-25 05:24:25 +08:00
[BugFix] Workspace allocation during profile run : DeepEPHighThroughput + DeepGEMM (#30899)
This commit is contained in:
parent
e06d0bf0aa
commit
e3fc374a9a
@ -795,7 +795,10 @@ class FusedMoEModularKernel(torch.nn.Module):
|
|||||||
top_k,
|
top_k,
|
||||||
global_num_experts,
|
global_num_experts,
|
||||||
local_num_experts,
|
local_num_experts,
|
||||||
expert_tokens_meta,
|
# expert_tokens_meta help in allocating optimal/minimal
|
||||||
|
# amount of workspace. Mark it None, so we allocate for
|
||||||
|
# the worst-case scenario.
|
||||||
|
expert_tokens_meta=None,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user