mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 20:35:57 +08:00
[DP] Support api-server-count > 0 in hybrid DP LB mode (#21510)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
2212cd6cfb
commit
9c8b2c2a8a
@ -147,7 +147,7 @@ def default_server_args():
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module", params=[1]) # Only 1 API server for now
|
@pytest.fixture(scope="module", params=[1, 4])
|
||||||
def servers(request, default_server_args):
|
def servers(request, default_server_args):
|
||||||
api_server_count = request.param
|
api_server_count = request.param
|
||||||
with HybridLBServerManager(MODEL_NAME, DP_SIZE, api_server_count,
|
with HybridLBServerManager(MODEL_NAME, DP_SIZE, api_server_count,
|
||||||
|
|||||||
@ -165,18 +165,14 @@ def run_multi_api_server(args: argparse.Namespace):
|
|||||||
" api_server_count > 1")
|
" api_server_count > 1")
|
||||||
model_config.disable_mm_preprocessor_cache = True
|
model_config.disable_mm_preprocessor_cache = True
|
||||||
|
|
||||||
if vllm_config.parallel_config.data_parallel_hybrid_lb:
|
|
||||||
raise NotImplementedError(
|
|
||||||
"Hybrid load balancing with --api-server-count > 0"
|
|
||||||
"is not yet supported.")
|
|
||||||
|
|
||||||
executor_class = Executor.get_class(vllm_config)
|
executor_class = Executor.get_class(vllm_config)
|
||||||
log_stats = not engine_args.disable_log_stats
|
log_stats = not engine_args.disable_log_stats
|
||||||
|
|
||||||
parallel_config = vllm_config.parallel_config
|
parallel_config = vllm_config.parallel_config
|
||||||
dp_rank = parallel_config.data_parallel_rank
|
dp_rank = parallel_config.data_parallel_rank
|
||||||
external_dp_lb = parallel_config.data_parallel_external_lb
|
external_dp_lb = parallel_config.data_parallel_external_lb
|
||||||
assert external_dp_lb or dp_rank == 0
|
hybrid_dp_lb = parallel_config.data_parallel_hybrid_lb
|
||||||
|
assert external_dp_lb or hybrid_dp_lb or dp_rank == 0
|
||||||
|
|
||||||
api_server_manager: Optional[APIServerProcessManager] = None
|
api_server_manager: Optional[APIServerProcessManager] = None
|
||||||
|
|
||||||
@ -196,12 +192,12 @@ def run_multi_api_server(args: argparse.Namespace):
|
|||||||
stats_update_address=coordinator.get_stats_publish_address()
|
stats_update_address=coordinator.get_stats_publish_address()
|
||||||
if coordinator else None)
|
if coordinator else None)
|
||||||
|
|
||||||
# For dp ranks > 0 in external DP LB mode, we must delay the
|
# For dp ranks > 0 in external/hybrid DP LB modes, we must delay the
|
||||||
# start of the API servers until the local engine is started
|
# start of the API servers until the local engine is started
|
||||||
# (after the launcher context manager exits),
|
# (after the launcher context manager exits),
|
||||||
# since we get the front-end stats update address from the coordinator
|
# since we get the front-end stats update address from the coordinator
|
||||||
# via the handshake with the local engine.
|
# via the handshake with the local engine.
|
||||||
if dp_rank == 0 or not external_dp_lb:
|
if dp_rank == 0 or not (external_dp_lb or hybrid_dp_lb):
|
||||||
# Start API servers using the manager.
|
# Start API servers using the manager.
|
||||||
api_server_manager = APIServerProcessManager(
|
api_server_manager = APIServerProcessManager(
|
||||||
**api_server_manager_kwargs)
|
**api_server_manager_kwargs)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user