[moe] Use enable_chunking func (to support disabling chunking) (#29935)

Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
Ming Yang 2025-12-18 01:02:38 -08:00 committed by GitHub
parent 8da6ae49c3
commit 8372be2828
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module):
1,
(
M
if not self.fused_experts.supports_chunking()
if not self.fused_experts.enable_chunking()
else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
),
)
@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module):
is_forward_context_available()
and get_forward_context().attn_metadata is None
)
if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep:
if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep:
max_workspace_13, max_workspace_2, max_fused_out_shape = (
self.fused_experts.workspace_shapes(
envs.VLLM_FUSED_MOE_CHUNK_SIZE,