[DP] Support api-server-count > 0 in hybrid DP LB mode (#21510)

Signed-off-by: Nick Hill <nhill@redhat.com>
2025-12-15 20:35:57 +08:00 · 2025-07-25 04:18:16 +01:00 · 2025-07-25 04:18:16 +01:00 · 9c8b2c2a8a
commit 9c8b2c2a8a
parent 2212cd6cfb
2 changed files with 5 additions and 9 deletions
--- a/tests/v1/test_hybrid_lb_dp.py
+++ b/tests/v1/test_hybrid_lb_dp.py
@ -147,7 +147,7 @@ def default_server_args():
    ]
-@pytest.fixture(scope="module", params=[1])  # Only 1 API server for now
+@pytest.fixture(scope="module", params=[1, 4])
 def servers(request, default_server_args):
    api_server_count = request.param
    with HybridLBServerManager(MODEL_NAME, DP_SIZE, api_server_count,
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@ -165,18 +165,14 @@ def run_multi_api_server(args: argparse.Namespace):
                " api_server_count > 1")
            model_config.disable_mm_preprocessor_cache = True
        if vllm_config.parallel_config.data_parallel_hybrid_lb:
            raise NotImplementedError(
                "Hybrid load balancing with --api-server-count > 0"
                "is not yet supported.")
    executor_class = Executor.get_class(vllm_config)
    log_stats = not engine_args.disable_log_stats
    parallel_config = vllm_config.parallel_config
    dp_rank = parallel_config.data_parallel_rank
    external_dp_lb = parallel_config.data_parallel_external_lb
-    assert external_dp_lb or dp_rank == 0
+    hybrid_dp_lb = parallel_config.data_parallel_hybrid_lb
    assert external_dp_lb or hybrid_dp_lb or dp_rank == 0
    api_server_manager: Optional[APIServerProcessManager] = None
@ -196,12 +192,12 @@ def run_multi_api_server(args: argparse.Namespace):
            stats_update_address=coordinator.get_stats_publish_address()
            if coordinator else None)
-        # For dp ranks > 0 in external DP LB mode, we must delay the
+        # For dp ranks > 0 in external/hybrid DP LB modes, we must delay the
        # start of the API servers until the local engine is started
        # (after the launcher context manager exits),
        # since we get the front-end stats update address from the coordinator
        # via the handshake with the local engine.
-        if dp_rank == 0 or not external_dp_lb:
+        if dp_rank == 0 or not (external_dp_lb or hybrid_dp_lb):
            # Start API servers using the manager.
            api_server_manager = APIServerProcessManager(
                **api_server_manager_kwargs)