mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-23 22:43:41 +08:00
refactor ux
Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
parent
6206a06a84
commit
c22990470f
@ -295,6 +295,7 @@ class EngineArgs:
|
||||
tensor_parallel_size: int = ParallelConfig.tensor_parallel_size
|
||||
data_parallel_size: int = ParallelConfig.data_parallel_size
|
||||
data_parallel_rank: Optional[int] = None
|
||||
data_parallel_start_rank: Optional[int] = None
|
||||
data_parallel_size_local: Optional[int] = None
|
||||
data_parallel_address: Optional[str] = None
|
||||
data_parallel_rpc_port: Optional[int] = None
|
||||
@ -606,6 +607,11 @@ class EngineArgs:
|
||||
type=int,
|
||||
help='Data parallel rank of this instance. '
|
||||
'When set, enables external load balancer mode.')
|
||||
parallel_group.add_argument('--data-parallel-start-rank',
|
||||
'-dpr',
|
||||
type=int,
|
||||
help='Starting data parallel rank '
|
||||
'for secondary nodes.')
|
||||
parallel_group.add_argument('--data-parallel-size-local',
|
||||
'-dpl',
|
||||
type=int,
|
||||
@ -1091,19 +1097,36 @@ class EngineArgs:
|
||||
# but we should not do this here.
|
||||
placement_group = ray.util.get_current_placement_group()
|
||||
|
||||
# data_parallel_external_lb = self.data_parallel_rank is not None
|
||||
# if data_parallel_external_lb:
|
||||
# assert self.data_parallel_size_local in (1, None), (
|
||||
# "data_parallel_size_local must be 1 when data_parallel_rank "
|
||||
# "is set")
|
||||
# data_parallel_size_local = 1
|
||||
# elif self.data_parallel_size_local is not None:
|
||||
data_parallel_external_lb = False
|
||||
if self.data_parallel_size_local is not None:
|
||||
# Organize --data-parallel-start-rank and --data-parallel-rank.
|
||||
if self.data_parallel_start_rank is not None:
|
||||
if self.data_parallel_rank is not None:
|
||||
raise ValueError(
|
||||
"Found --data-parallel-rank and --data-parallel-start-rank."
|
||||
"Only one should be set (use --data-parallel-start-rank).")
|
||||
else:
|
||||
self.data_parallel_rank = self.data_parallel_start_rank
|
||||
|
||||
# Validate External LB.
|
||||
data_parallel_external_lb = True
|
||||
if data_parallel_external_lb:
|
||||
if self.data_parallel_size_local is None:
|
||||
raise ValueError(
|
||||
"With external LB, --data-parallel-size-local must be set."
|
||||
)
|
||||
if self.data_parallel_size_local >= self.data_parallel_size:
|
||||
raise ValueError(
|
||||
"With external LB, --data-parallel-size-local must be less "
|
||||
"than --data-parallel-size.")
|
||||
if (self.data_parallel_rank is not None
|
||||
and self.data_parallel_size_local > 1):
|
||||
raise ValueError(
|
||||
"With --data-parallel-size-local > 1, use --data-parall"
|
||||
"--data-parallel-rank")
|
||||
data_parallel_size_local = self.data_parallel_size_local
|
||||
else:
|
||||
# Local DP size defaults to global DP size if not set.
|
||||
data_parallel_size_local = self.data_parallel_size
|
||||
|
||||
# Local DP size defaults to global DP size if not set.
|
||||
data_parallel_size_local = (self.data_parallel_size_local
|
||||
or self.data_parallel_size)
|
||||
|
||||
# DP address, used in multi-node case for torch distributed group
|
||||
# and ZMQ sockets.
|
||||
|
||||
@ -45,11 +45,6 @@ class ServeSubcommand(CLISubcommand):
|
||||
if args.headless or args.api_server_count < 1:
|
||||
run_headless(args)
|
||||
else:
|
||||
if args.data_parallel_start_rank:
|
||||
raise ValueError(
|
||||
"data_parallel_start_rank is only applicable "
|
||||
"in headless mode. "
|
||||
"Add --headless flag to enable headless mode.")
|
||||
if args.api_server_count > 1:
|
||||
run_multi_api_server(args)
|
||||
else:
|
||||
@ -122,7 +117,7 @@ def run_headless(args: argparse.Namespace):
|
||||
engine_manager = CoreEngineProcManager(
|
||||
target_fn=EngineCoreProc.run_engine_core,
|
||||
local_engine_count=local_engine_count,
|
||||
start_index=args.data_parallel_start_rank,
|
||||
start_index=vllm_config.parallel_config.data_parallel_rank,
|
||||
local_start_index=0,
|
||||
vllm_config=vllm_config,
|
||||
local_client=False,
|
||||
|
||||
@ -253,13 +253,6 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
default=False,
|
||||
help="Run in headless mode. See multi-node data parallel "
|
||||
"documentation for more details.")
|
||||
parser.add_argument(
|
||||
"--data-parallel-start-rank",
|
||||
"-dpr",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Starting data parallel rank for secondary nodes. "
|
||||
"Requires --headless.")
|
||||
parser.add_argument("--api-server-count",
|
||||
"-asc",
|
||||
type=int,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user