diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index b0834e861338f..25308b3106a44 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module): 1, ( M - if not self.fused_experts.supports_chunking() + if not self.fused_experts.enable_chunking() else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE) ), ) @@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module): is_forward_context_available() and get_forward_context().attn_metadata is None ) - if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep: + if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep: max_workspace_13, max_workspace_2, max_fused_out_shape = ( self.fused_experts.workspace_shapes( envs.VLLM_FUSED_MOE_CHUNK_SIZE,