From cda92307c145e7722cdc33e6d26e105eeb22b882 Mon Sep 17 00:00:00 2001 From: Jiayi Yao <82156730+YaoJiayi@users.noreply.github.com> Date: Tue, 17 Jun 2025 12:57:54 -0700 Subject: [PATCH] [Misc] Update lmcache connector with the latest connector apis (#19441) Signed-off-by: YaoJiayi <120040070@link.cuhk.edu.cn> --- .../kv_connector/v1/lmcache_connector.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py index cc1f4ba35642..e838ac2499c0 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional import torch from lmcache.integration.vllm.vllm_v1_adapter import LMCacheConnectorV1Impl @@ -87,6 +87,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1): """ self._lmcache_engine.wait_for_save() + def get_finished( + self, finished_req_ids: set[str] + ) -> tuple[Optional[set[str]], Optional[set[str]]]: + """ + Notifies worker-side connector ids of requests that have + finished generating tokens. + + Returns: + ids of requests that have finished asynchronous transfer + (requests that previously returned True from request_finished()), + tuple of (sending/saving ids, recving/loading ids). + The finished saves/sends req ids must belong to a set provided in a + call to this method (this call or a prior one). + """ + return self._lmcache_engine.get_finished(finished_req_ids) + # ============================== # Scheduler-side methods # ============================== @@ -132,3 +148,20 @@ class LMCacheConnectorV1(KVConnectorBase_V1): scheduler_output (SchedulerOutput): the scheduler output object. """ return self._lmcache_engine.build_connector_meta(scheduler_output) + + def request_finished( + self, + request: "Request", + block_ids: list[int], + ) -> tuple[bool, Optional[dict[str, Any]]]: + """ + Called when a request has finished, before its blocks are freed. + + Returns: + True if the request is being saved/sent asynchronously and blocks + should not be freed until the request_id is returned from + get_finished(). + Optional KVTransferParams to be included in the request outputs + returned by the engine. + """ + return self._lmcache_engine.request_finished(request, block_ids)