mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-09 23:55:40 +08:00
refactor ux
Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
parent
6206a06a84
commit
c22990470f
@ -295,6 +295,7 @@ class EngineArgs:
|
|||||||
tensor_parallel_size: int = ParallelConfig.tensor_parallel_size
|
tensor_parallel_size: int = ParallelConfig.tensor_parallel_size
|
||||||
data_parallel_size: int = ParallelConfig.data_parallel_size
|
data_parallel_size: int = ParallelConfig.data_parallel_size
|
||||||
data_parallel_rank: Optional[int] = None
|
data_parallel_rank: Optional[int] = None
|
||||||
|
data_parallel_start_rank: Optional[int] = None
|
||||||
data_parallel_size_local: Optional[int] = None
|
data_parallel_size_local: Optional[int] = None
|
||||||
data_parallel_address: Optional[str] = None
|
data_parallel_address: Optional[str] = None
|
||||||
data_parallel_rpc_port: Optional[int] = None
|
data_parallel_rpc_port: Optional[int] = None
|
||||||
@ -606,6 +607,11 @@ class EngineArgs:
|
|||||||
type=int,
|
type=int,
|
||||||
help='Data parallel rank of this instance. '
|
help='Data parallel rank of this instance. '
|
||||||
'When set, enables external load balancer mode.')
|
'When set, enables external load balancer mode.')
|
||||||
|
parallel_group.add_argument('--data-parallel-start-rank',
|
||||||
|
'-dpr',
|
||||||
|
type=int,
|
||||||
|
help='Starting data parallel rank '
|
||||||
|
'for secondary nodes.')
|
||||||
parallel_group.add_argument('--data-parallel-size-local',
|
parallel_group.add_argument('--data-parallel-size-local',
|
||||||
'-dpl',
|
'-dpl',
|
||||||
type=int,
|
type=int,
|
||||||
@ -1091,19 +1097,36 @@ class EngineArgs:
|
|||||||
# but we should not do this here.
|
# but we should not do this here.
|
||||||
placement_group = ray.util.get_current_placement_group()
|
placement_group = ray.util.get_current_placement_group()
|
||||||
|
|
||||||
# data_parallel_external_lb = self.data_parallel_rank is not None
|
# Organize --data-parallel-start-rank and --data-parallel-rank.
|
||||||
# if data_parallel_external_lb:
|
if self.data_parallel_start_rank is not None:
|
||||||
# assert self.data_parallel_size_local in (1, None), (
|
if self.data_parallel_rank is not None:
|
||||||
# "data_parallel_size_local must be 1 when data_parallel_rank "
|
raise ValueError(
|
||||||
# "is set")
|
"Found --data-parallel-rank and --data-parallel-start-rank."
|
||||||
# data_parallel_size_local = 1
|
"Only one should be set (use --data-parallel-start-rank).")
|
||||||
# elif self.data_parallel_size_local is not None:
|
else:
|
||||||
data_parallel_external_lb = False
|
self.data_parallel_rank = self.data_parallel_start_rank
|
||||||
if self.data_parallel_size_local is not None:
|
|
||||||
|
# Validate External LB.
|
||||||
|
data_parallel_external_lb = True
|
||||||
|
if data_parallel_external_lb:
|
||||||
|
if self.data_parallel_size_local is None:
|
||||||
|
raise ValueError(
|
||||||
|
"With external LB, --data-parallel-size-local must be set."
|
||||||
|
)
|
||||||
|
if self.data_parallel_size_local >= self.data_parallel_size:
|
||||||
|
raise ValueError(
|
||||||
|
"With external LB, --data-parallel-size-local must be less "
|
||||||
|
"than --data-parallel-size.")
|
||||||
|
if (self.data_parallel_rank is not None
|
||||||
|
and self.data_parallel_size_local > 1):
|
||||||
|
raise ValueError(
|
||||||
|
"With --data-parallel-size-local > 1, use --data-parall"
|
||||||
|
"--data-parallel-rank")
|
||||||
data_parallel_size_local = self.data_parallel_size_local
|
data_parallel_size_local = self.data_parallel_size_local
|
||||||
else:
|
|
||||||
# Local DP size defaults to global DP size if not set.
|
# Local DP size defaults to global DP size if not set.
|
||||||
data_parallel_size_local = self.data_parallel_size
|
data_parallel_size_local = (self.data_parallel_size_local
|
||||||
|
or self.data_parallel_size)
|
||||||
|
|
||||||
# DP address, used in multi-node case for torch distributed group
|
# DP address, used in multi-node case for torch distributed group
|
||||||
# and ZMQ sockets.
|
# and ZMQ sockets.
|
||||||
|
|||||||
@ -45,11 +45,6 @@ class ServeSubcommand(CLISubcommand):
|
|||||||
if args.headless or args.api_server_count < 1:
|
if args.headless or args.api_server_count < 1:
|
||||||
run_headless(args)
|
run_headless(args)
|
||||||
else:
|
else:
|
||||||
if args.data_parallel_start_rank:
|
|
||||||
raise ValueError(
|
|
||||||
"data_parallel_start_rank is only applicable "
|
|
||||||
"in headless mode. "
|
|
||||||
"Add --headless flag to enable headless mode.")
|
|
||||||
if args.api_server_count > 1:
|
if args.api_server_count > 1:
|
||||||
run_multi_api_server(args)
|
run_multi_api_server(args)
|
||||||
else:
|
else:
|
||||||
@ -122,7 +117,7 @@ def run_headless(args: argparse.Namespace):
|
|||||||
engine_manager = CoreEngineProcManager(
|
engine_manager = CoreEngineProcManager(
|
||||||
target_fn=EngineCoreProc.run_engine_core,
|
target_fn=EngineCoreProc.run_engine_core,
|
||||||
local_engine_count=local_engine_count,
|
local_engine_count=local_engine_count,
|
||||||
start_index=args.data_parallel_start_rank,
|
start_index=vllm_config.parallel_config.data_parallel_rank,
|
||||||
local_start_index=0,
|
local_start_index=0,
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
local_client=False,
|
local_client=False,
|
||||||
|
|||||||
@ -253,13 +253,6 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
|||||||
default=False,
|
default=False,
|
||||||
help="Run in headless mode. See multi-node data parallel "
|
help="Run in headless mode. See multi-node data parallel "
|
||||||
"documentation for more details.")
|
"documentation for more details.")
|
||||||
parser.add_argument(
|
|
||||||
"--data-parallel-start-rank",
|
|
||||||
"-dpr",
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help="Starting data parallel rank for secondary nodes. "
|
|
||||||
"Requires --headless.")
|
|
||||||
parser.add_argument("--api-server-count",
|
parser.add_argument("--api-server-count",
|
||||||
"-asc",
|
"-asc",
|
||||||
type=int,
|
type=int,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user