From 919234fe17a701dfb5c7370df6cd1ea22202c4d7 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 14 Aug 2025 15:20:28 -0700 Subject: [PATCH] [BugFix] Fix initial DP request load imbalance (#22910) Signed-off-by: Nick Hill --- vllm/v1/engine/core_client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index 5ffa555570a22..29ee0a9dfb1e2 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -965,7 +965,7 @@ class DPAsyncMPClient(AsyncMPClient): # List of [waiting, running] pair per engine. # Used only by DPLBAsyncMPClient subclass. - self.lb_engines: list[list[int]] = [] + self.lb_engines: list[list[int]] = [[0, 0] for _ in self.core_engines] self.first_req_sock_addr = get_open_zmq_inproc_path() self.first_req_send_socket = self.resources.first_req_send_socket = ( @@ -1121,10 +1121,8 @@ class DPLBAsyncMPClient(DPAsyncMPClient): def get_core_engine_for_request( self, request: EngineCoreRequest) -> EngineIdentity: # Engines are in rank order. - current_counts = self.lb_engines if (eng_index := request.data_parallel_rank) is None: - if not current_counts: - return self.core_engine + current_counts = self.lb_engines # TODO use P2C alg for larger DP sizes num_engines = len(current_counts) min_score = sys.maxsize