From 8372be2828f16dd339b24d46cb6142c9d0afd004 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Thu, 18 Dec 2025 01:02:38 -0800 Subject: [PATCH] [moe] Use enable_chunking func (to support disabling chunking) (#29935) Signed-off-by: Ming Yang --- vllm/model_executor/layers/fused_moe/modular_kernel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index b0834e861338f..25308b3106a44 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module): 1, ( M - if not self.fused_experts.supports_chunking() + if not self.fused_experts.enable_chunking() else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE) ), ) @@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module): is_forward_context_available() and get_forward_context().attn_metadata is None ) - if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep: + if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep: max_workspace_13, max_workspace_2, max_fused_out_shape = ( self.fused_experts.workspace_shapes( envs.VLLM_FUSED_MOE_CHUNK_SIZE,