mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-24 03:23:39 +08:00
[moe] Allow disabling DP chunking (#29936)
Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
parent
f1599ca55d
commit
9d6235ca9a
@ -144,6 +144,7 @@ if TYPE_CHECKING:
|
||||
VLLM_DP_MASTER_IP: str = ""
|
||||
VLLM_DP_MASTER_PORT: int = 0
|
||||
VLLM_MOE_DP_CHUNK_SIZE: int = 256
|
||||
VLLM_ENABLE_MOE_DP_CHUNK: bool = True
|
||||
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
|
||||
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
|
||||
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
|
||||
@ -1101,6 +1102,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
|
||||
# units.
|
||||
"VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
|
||||
"VLLM_ENABLE_MOE_DP_CHUNK": lambda: bool(
|
||||
int(os.getenv("VLLM_ENABLE_MOE_DP_CHUNK", "1"))
|
||||
),
|
||||
# Randomize inputs during dummy runs when using Data Parallel
|
||||
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
|
||||
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"
|
||||
|
||||
@ -753,7 +753,7 @@ class FusedMoE(CustomOp):
|
||||
self.moe_parallel_config.use_pplx_kernels
|
||||
or self.moe_parallel_config.use_deepep_ll_kernels
|
||||
or (self.dp_size > 1 and self.use_flashinfer_cutlass_kernels)
|
||||
)
|
||||
) and envs.VLLM_ENABLE_MOE_DP_CHUNK
|
||||
|
||||
@property
|
||||
def is_internal_router(self) -> bool:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user