mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 19:29:10 +08:00
fix lb issues
Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
parent
d2d54e9c72
commit
4438796b48
@ -326,6 +326,7 @@ class AsyncLLM(EngineClient):
|
|||||||
# task switching under load which helps performance).
|
# task switching under load which helps performance).
|
||||||
out = q.get_nowait() or await q.get()
|
out = q.get_nowait() or await q.get()
|
||||||
|
|
||||||
|
|
||||||
# Note: both OutputProcessor and EngineCore handle their
|
# Note: both OutputProcessor and EngineCore handle their
|
||||||
# own request cleanup based on finished.
|
# own request cleanup based on finished.
|
||||||
finished = out.finished
|
finished = out.finished
|
||||||
|
|||||||
@ -709,8 +709,12 @@ class AsyncMPClient(MPClient):
|
|||||||
assert output_socket is not None
|
assert output_socket is not None
|
||||||
|
|
||||||
async def process_outputs_socket():
|
async def process_outputs_socket():
|
||||||
|
i = 0
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
if i % 100 == 0:
|
||||||
|
logger.info(f"{i=}")
|
||||||
|
i += 1
|
||||||
frames = await output_socket.recv_multipart(copy=False)
|
frames = await output_socket.recv_multipart(copy=False)
|
||||||
resources.validate_alive(frames)
|
resources.validate_alive(frames)
|
||||||
outputs: EngineCoreOutputs = decoder.decode(frames)
|
outputs: EngineCoreOutputs = decoder.decode(frames)
|
||||||
@ -895,6 +899,8 @@ class DPAsyncMPClient(AsyncMPClient):
|
|||||||
return
|
return
|
||||||
|
|
||||||
assert self.stats_update_address is not None
|
assert self.stats_update_address is not None
|
||||||
|
dp_start_rank = self.vllm_config.parallel_config.data_parallel_rank
|
||||||
|
dp_end_rank = dp_start_rank + self.vllm_config.parallel_config.data_parallel_size_local
|
||||||
|
|
||||||
async def run_engine_stats_update_task():
|
async def run_engine_stats_update_task():
|
||||||
with make_zmq_socket(self.ctx, self.stats_update_address,
|
with make_zmq_socket(self.ctx, self.stats_update_address,
|
||||||
@ -959,7 +965,7 @@ class DPAsyncMPClient(AsyncMPClient):
|
|||||||
counts, wave, running = msgspec.msgpack.decode(buf)
|
counts, wave, running = msgspec.msgpack.decode(buf)
|
||||||
self.current_wave = wave
|
self.current_wave = wave
|
||||||
self.engines_running = running
|
self.engines_running = running
|
||||||
self.lb_engines = counts
|
self.lb_engines = counts[dp_start_rank:dp_end_rank]
|
||||||
|
|
||||||
resources.stats_update_task = asyncio.create_task(
|
resources.stats_update_task = asyncio.create_task(
|
||||||
run_engine_stats_update_task())
|
run_engine_stats_update_task())
|
||||||
@ -973,6 +979,7 @@ class DPAsyncMPClient(AsyncMPClient):
|
|||||||
chosen_engine = self.get_core_engine_for_request(request)
|
chosen_engine = self.get_core_engine_for_request(request)
|
||||||
to_await = self._send_input(EngineCoreRequestType.ADD, request,
|
to_await = self._send_input(EngineCoreRequestType.ADD, request,
|
||||||
chosen_engine)
|
chosen_engine)
|
||||||
|
|
||||||
if not self.engines_running:
|
if not self.engines_running:
|
||||||
# Notify coordinator that we're sending a request
|
# Notify coordinator that we're sending a request
|
||||||
req_msg = msgspec.msgpack.encode(("FIRST_REQ", chosen_engine))
|
req_msg = msgspec.msgpack.encode(("FIRST_REQ", chosen_engine))
|
||||||
@ -1007,6 +1014,9 @@ class DPLBAsyncMPClient(DPAsyncMPClient):
|
|||||||
|
|
||||||
def get_core_engine_for_request(
|
def get_core_engine_for_request(
|
||||||
self, request: EngineCoreRequest) -> EngineIdentity:
|
self, request: EngineCoreRequest) -> EngineIdentity:
|
||||||
|
logger.info(f"{self.lb_engines=} | {request.data_parallel_rank=}")
|
||||||
|
logger.info(f"{self.core_engine=}")
|
||||||
|
logger.info(f"{self.client_index=}")
|
||||||
# Engines are in rank order.
|
# Engines are in rank order.
|
||||||
if (eng_index := request.data_parallel_rank) is None:
|
if (eng_index := request.data_parallel_rank) is None:
|
||||||
if not self.lb_engines:
|
if not self.lb_engines:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user