mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-02 15:24:28 +08:00
[BugFix] Fix initial DP request load imbalance (#22910)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
ebcce2cd36
commit
919234fe17
@ -965,7 +965,7 @@ class DPAsyncMPClient(AsyncMPClient):
|
|||||||
|
|
||||||
# List of [waiting, running] pair per engine.
|
# List of [waiting, running] pair per engine.
|
||||||
# Used only by DPLBAsyncMPClient subclass.
|
# Used only by DPLBAsyncMPClient subclass.
|
||||||
self.lb_engines: list[list[int]] = []
|
self.lb_engines: list[list[int]] = [[0, 0] for _ in self.core_engines]
|
||||||
|
|
||||||
self.first_req_sock_addr = get_open_zmq_inproc_path()
|
self.first_req_sock_addr = get_open_zmq_inproc_path()
|
||||||
self.first_req_send_socket = self.resources.first_req_send_socket = (
|
self.first_req_send_socket = self.resources.first_req_send_socket = (
|
||||||
@ -1121,10 +1121,8 @@ class DPLBAsyncMPClient(DPAsyncMPClient):
|
|||||||
def get_core_engine_for_request(
|
def get_core_engine_for_request(
|
||||||
self, request: EngineCoreRequest) -> EngineIdentity:
|
self, request: EngineCoreRequest) -> EngineIdentity:
|
||||||
# Engines are in rank order.
|
# Engines are in rank order.
|
||||||
current_counts = self.lb_engines
|
|
||||||
if (eng_index := request.data_parallel_rank) is None:
|
if (eng_index := request.data_parallel_rank) is None:
|
||||||
if not current_counts:
|
current_counts = self.lb_engines
|
||||||
return self.core_engine
|
|
||||||
# TODO use P2C alg for larger DP sizes
|
# TODO use P2C alg for larger DP sizes
|
||||||
num_engines = len(current_counts)
|
num_engines = len(current_counts)
|
||||||
min_score = sys.maxsize
|
min_score = sys.maxsize
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user