mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:54:56 +08:00
[P/D] Log warnings related to prefill KV expiry (#21753)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
e17a4d3bf9
commit
7d44c691b0
@ -1025,6 +1025,11 @@ class NixlConnectorWorker:
|
||||
# Sorted dict, oldest requests are put first so we can exit early.
|
||||
if now < expires:
|
||||
break
|
||||
count = self.consumer_notification_counts_by_req.pop(req_id, 0)
|
||||
logger.warning(
|
||||
"Releasing expired KV blocks for request %s which were "
|
||||
"retrieved by %d decode worker(s) within %d seconds.", req_id,
|
||||
count, envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT)
|
||||
del self._reqs_to_send[req_id]
|
||||
done_sending.add(req_id)
|
||||
|
||||
@ -1040,6 +1045,13 @@ class NixlConnectorWorker:
|
||||
for notifs in self.nixl_wrapper.get_new_notifs().values():
|
||||
for notif in notifs:
|
||||
req_id, tp_ratio = notif.decode("utf-8").rsplit(":", 1)
|
||||
if req_id not in self._reqs_to_send:
|
||||
logger.error(
|
||||
"Potentially invalid KV blocks for "
|
||||
"unrecognized request %s were retrieved by "
|
||||
"a decode worker. They may have expired.", req_id)
|
||||
continue
|
||||
|
||||
self.consumer_notification_counts_by_req[req_id] += 1
|
||||
# Wait all consumers (D) to be done reading before freeing.
|
||||
if self.consumer_notification_counts_by_req[req_id] == int(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user