Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
Robert Shaw 2025-07-20 23:32:49 +00:00
parent d327a6bed5
commit ec86e797da
2 changed files with 14 additions and 30 deletions

View File

@ -1892,16 +1892,15 @@ class ParallelConfig:
data_parallel_external_lb: bool = False
"""Whether to use "external" DP LB mode. Applies only to online serving
and when data_parallel_size > 0. This is useful for a "one-pod-per-rank"
wide-EP setup in Kuberentes. Set implicitly when data_parallel_rank
wide-EP setup in Kuberentes. Set implicitly when --data-parallel-rank
is provided explicitly to vllm serve."""
data_parallel_hybrid_lb: bool = False
"""Whether to use "hybrid" DP LB mode. Applies only to online serving
and when data_parallel_size > 0. Enables running an AsyncLLM
and API server on a "per-node" basis where vLLM load balances
between local data parallel ranks, but an external LB balances
between vLLM nodes/replicas. This is useful for a "one-pod-per-node"
wide-EP setup in Kuberentes. Set explicitly by the user.
"""
between vLLM nodes/replicas. Set explicitly in conjunction with
--data-parallel-start-rank."""
enable_expert_parallel: bool = False
"""Use expert parallelism instead of tensor parallelism for MoE layers."""
enable_eplb: bool = False

View File

@ -1097,32 +1097,17 @@ class EngineArgs:
# but we should not do this here.
placement_group = ray.util.get_current_placement_group()
# Organize --data-parallel-start-rank and --data-parallel-rank.
if self.data_parallel_start_rank is not None:
if self.data_parallel_rank is not None:
raise ValueError(
"Found --data-parallel-rank and --data-parallel-start-rank."
"Only one should be set (use --data-parallel-start-rank).")
else:
self.data_parallel_rank = self.data_parallel_start_rank
# Validate External LB.
data_parallel_hybrid_lb = True
if data_parallel_hybrid_lb:
if self.data_parallel_size_local is None:
raise ValueError(
"With external LB, --data-parallel-size-local must be set."
)
if self.data_parallel_size_local >= self.data_parallel_size:
raise ValueError(
"With external LB, --data-parallel-size-local must be "
"less than --data-parallel-size.")
data_parallel_external_lb = self.data_parallel_rank is not None
if data_parallel_external_lb:
assert self.data_parallel_size_local in (1, None), (
"data_parallel_size_local must be 1 when data_parallel_rank "
"is set")
data_parallel_size_local = 1
elif self.data_parallel_size_local is not None:
data_parallel_size_local = self.data_parallel_size_local
# Local DP size defaults to global DP size if not set.
data_parallel_size_local = (self.data_parallel_size_local
or self.data_parallel_size)
else:
# Local DP size defaults to global DP size if not set.
data_parallel_size_local = self.data_parallel_size
# DP address, used in multi-node case for torch distributed group
# and ZMQ sockets.
@ -1177,7 +1162,7 @@ class EngineArgs:
data_parallel_master_ip=data_parallel_address,
data_parallel_rpc_port=data_parallel_rpc_port,
data_parallel_backend=self.data_parallel_backend,
data_parallel_hybrid_lb=False,
data_parallel_hybrid_lb=True,
enable_expert_parallel=self.enable_expert_parallel,
enable_eplb=self.enable_eplb,
num_redundant_experts=self.num_redundant_experts,