[BugFix] Fix initial DP request load imbalance (#22910)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill 2025-08-14 15:20:28 -07:00 committed by GitHub
parent ebcce2cd36
commit 919234fe17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -965,7 +965,7 @@ class DPAsyncMPClient(AsyncMPClient):
# List of [waiting, running] pair per engine.
# Used only by DPLBAsyncMPClient subclass.
self.lb_engines: list[list[int]] = []
self.lb_engines: list[list[int]] = [[0, 0] for _ in self.core_engines]
self.first_req_sock_addr = get_open_zmq_inproc_path()
self.first_req_send_socket = self.resources.first_req_send_socket = (
@ -1121,10 +1121,8 @@ class DPLBAsyncMPClient(DPAsyncMPClient):
def get_core_engine_for_request(
self, request: EngineCoreRequest) -> EngineIdentity:
# Engines are in rank order.
current_counts = self.lb_engines
if (eng_index := request.data_parallel_rank) is None:
if not current_counts:
return self.core_engine
current_counts = self.lb_engines
# TODO use P2C alg for larger DP sizes
num_engines = len(current_counts)
min_score = sys.maxsize