[moe] Use enable_chunking func (to support disabling chunking) (#29935)

Signed-off-by: Ming Yang <minos.future@gmail.com>
2026-03-16 14:07:13 +08:00 · 2025-12-18 01:02:38 -08:00 · 2025-12-18 01:02:38 -08:00 · 8372be2828
commit 8372be2828
parent 8da6ae49c3
1 changed files with 2 additions and 2 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module):
            1,
            (
                M
-                if not self.fused_experts.supports_chunking()
+                if not self.fused_experts.enable_chunking()
                else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
            ),
        )
@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module):
            is_forward_context_available()
            and get_forward_context().attn_metadata is None
        )
-        if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep:
+        if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep:
            max_workspace_13, max_workspace_2, max_fused_out_shape = (
                self.fused_experts.workspace_shapes(
                    envs.VLLM_FUSED_MOE_CHUNK_SIZE,