mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-01 19:51:19 +08:00
[moe] Allow disabling DP chunking (#29936)
Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
parent
f1599ca55d
commit
9d6235ca9a
@ -144,6 +144,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_DP_MASTER_IP: str = ""
|
VLLM_DP_MASTER_IP: str = ""
|
||||||
VLLM_DP_MASTER_PORT: int = 0
|
VLLM_DP_MASTER_PORT: int = 0
|
||||||
VLLM_MOE_DP_CHUNK_SIZE: int = 256
|
VLLM_MOE_DP_CHUNK_SIZE: int = 256
|
||||||
|
VLLM_ENABLE_MOE_DP_CHUNK: bool = True
|
||||||
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
|
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
|
||||||
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
|
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
|
||||||
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
|
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
|
||||||
@ -1101,6 +1102,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
# rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
|
# rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
|
||||||
# units.
|
# units.
|
||||||
"VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
|
"VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
|
||||||
|
"VLLM_ENABLE_MOE_DP_CHUNK": lambda: bool(
|
||||||
|
int(os.getenv("VLLM_ENABLE_MOE_DP_CHUNK", "1"))
|
||||||
|
),
|
||||||
# Randomize inputs during dummy runs when using Data Parallel
|
# Randomize inputs during dummy runs when using Data Parallel
|
||||||
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
|
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
|
||||||
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"
|
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"
|
||||||
|
|||||||
@ -753,7 +753,7 @@ class FusedMoE(CustomOp):
|
|||||||
self.moe_parallel_config.use_pplx_kernels
|
self.moe_parallel_config.use_pplx_kernels
|
||||||
or self.moe_parallel_config.use_deepep_ll_kernels
|
or self.moe_parallel_config.use_deepep_ll_kernels
|
||||||
or (self.dp_size > 1 and self.use_flashinfer_cutlass_kernels)
|
or (self.dp_size > 1 and self.use_flashinfer_cutlass_kernels)
|
||||||
)
|
) and envs.VLLM_ENABLE_MOE_DP_CHUNK
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_internal_router(self) -> bool:
|
def is_internal_router(self) -> bool:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user