[moe] Allow disabling DP chunking (#29936)

Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
Ming Yang 2025-12-08 16:29:36 -08:00 committed by GitHub
parent f1599ca55d
commit 9d6235ca9a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 1 deletions

View File

@ -144,6 +144,7 @@ if TYPE_CHECKING:
VLLM_DP_MASTER_IP: str = ""
VLLM_DP_MASTER_PORT: int = 0
VLLM_MOE_DP_CHUNK_SIZE: int = 256
VLLM_ENABLE_MOE_DP_CHUNK: bool = True
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
@ -1101,6 +1102,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
# rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
# units.
"VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
"VLLM_ENABLE_MOE_DP_CHUNK": lambda: bool(
int(os.getenv("VLLM_ENABLE_MOE_DP_CHUNK", "1"))
),
# Randomize inputs during dummy runs when using Data Parallel
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"

View File

@ -753,7 +753,7 @@ class FusedMoE(CustomOp):
self.moe_parallel_config.use_pplx_kernels
or self.moe_parallel_config.use_deepep_ll_kernels
or (self.dp_size > 1 and self.use_flashinfer_cutlass_kernels)
)
) and envs.VLLM_ENABLE_MOE_DP_CHUNK
@property
def is_internal_router(self) -> bool: