[DP] Support api-server-count > 0 in hybrid DP LB mode (#21510)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill 2025-07-25 04:18:16 +01:00 committed by GitHub
parent 2212cd6cfb
commit 9c8b2c2a8a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 9 deletions

View File

@ -147,7 +147,7 @@ def default_server_args():
]
@pytest.fixture(scope="module", params=[1]) # Only 1 API server for now
@pytest.fixture(scope="module", params=[1, 4])
def servers(request, default_server_args):
api_server_count = request.param
with HybridLBServerManager(MODEL_NAME, DP_SIZE, api_server_count,

View File

@ -165,18 +165,14 @@ def run_multi_api_server(args: argparse.Namespace):
" api_server_count > 1")
model_config.disable_mm_preprocessor_cache = True
if vllm_config.parallel_config.data_parallel_hybrid_lb:
raise NotImplementedError(
"Hybrid load balancing with --api-server-count > 0"
"is not yet supported.")
executor_class = Executor.get_class(vllm_config)
log_stats = not engine_args.disable_log_stats
parallel_config = vllm_config.parallel_config
dp_rank = parallel_config.data_parallel_rank
external_dp_lb = parallel_config.data_parallel_external_lb
assert external_dp_lb or dp_rank == 0
hybrid_dp_lb = parallel_config.data_parallel_hybrid_lb
assert external_dp_lb or hybrid_dp_lb or dp_rank == 0
api_server_manager: Optional[APIServerProcessManager] = None
@ -196,12 +192,12 @@ def run_multi_api_server(args: argparse.Namespace):
stats_update_address=coordinator.get_stats_publish_address()
if coordinator else None)
# For dp ranks > 0 in external DP LB mode, we must delay the
# For dp ranks > 0 in external/hybrid DP LB modes, we must delay the
# start of the API servers until the local engine is started
# (after the launcher context manager exits),
# since we get the front-end stats update address from the coordinator
# via the handshake with the local engine.
if dp_rank == 0 or not external_dp_lb:
if dp_rank == 0 or not (external_dp_lb or hybrid_dp_lb):
# Start API servers using the manager.
api_server_manager = APIServerProcessManager(
**api_server_manager_kwargs)