mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 12:16:03 +08:00
[moe] Use enable_chunking func (to support disabling chunking) (#29935)
Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
parent
8da6ae49c3
commit
8372be2828
@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
1,
|
||||
(
|
||||
M
|
||||
if not self.fused_experts.supports_chunking()
|
||||
if not self.fused_experts.enable_chunking()
|
||||
else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
|
||||
),
|
||||
)
|
||||
@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
is_forward_context_available()
|
||||
and get_forward_context().attn_metadata is None
|
||||
)
|
||||
if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep:
|
||||
if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep:
|
||||
max_workspace_13, max_workspace_2, max_fused_out_shape = (
|
||||
self.fused_experts.workspace_shapes(
|
||||
envs.VLLM_FUSED_MOE_CHUNK_SIZE,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user