From 889f8bb250d498dcd38d7bcf58fb3c9e50d54d14 Mon Sep 17 00:00:00 2001 From: wz1qqx <55830058+wz1qqx@users.noreply.github.com> Date: Fri, 19 Dec 2025 03:09:51 +0800 Subject: [PATCH] [BugFix]Reclaim resources to prevent memory leaks when use LMCacheMPConnector (#30745) Signed-off-by: wz1qqx Co-authored-by: wz1qqx --- .../lmcache_integration/multi_process_adapter.py | 8 ++++++++ .../kv_connector/v1/lmcache_mp_connector.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py index 6656b5a25f83d..9db4dedf48b7b 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py @@ -147,6 +147,14 @@ class LMCacheMPSchedulerAdapter: """ return self.blocks_in_chunk + def _cleanup_lookup_result(self, request_id: str) -> None: + """ + Clean up lookup future for a finished request to prevent memory leak. + Args: + request_id: The ID of the finished request. + """ + self.lookup_futures.pop(request_id, None) + # Helper functions def _create_key(self, block_hash: bytes) -> IPCCacheEngineKey: """Convert a block hash to an IPC cache engine key""" diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py index 995708b89bc26..29166be62c242 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py @@ -701,6 +701,8 @@ class LMCacheMPConnector(KVConnectorBase_V1): if condition else LMCacheMPRequestState.READY ) + # Clean up lookup future in scheduler adapter + self.scheduler_adapter._cleanup_lookup_result(request.request_id) def build_connector_meta( self, scheduler_output: SchedulerOutput @@ -754,6 +756,8 @@ class LMCacheMPConnector(KVConnectorBase_V1): Optional KVTransferParams to be included in the request outputs returned by the engine. """ + # Clean up request tracker to prevent memory leak + self._cleanup_request_tracker(request.request_id) return True, None def take_events(self) -> Iterable["KVCacheEvent"]: @@ -915,3 +919,15 @@ class LMCacheMPConnector(KVConnectorBase_V1): new_tracker = LMCacheMPRequestTracker(request) self.request_trackers[request_id] = new_tracker return self.request_trackers[request_id] + + def _cleanup_request_tracker(self, request_id: str) -> None: + """ + Clean up request tracker and associated lookup future for a request. + This should be called when a request is finished to prevent memory leak. + """ + # Clean up request tracker + if self.request_trackers.pop(request_id, None): + logger.debug( + "[KVConnector] Cleaned up request_tracker for request %s", + request_id, + )