mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 12:36:32 +08:00
[BugFix]Reclaim resources to prevent memory leaks when use LMCacheMPConnector (#30745)
Signed-off-by: wz1qqx <ziqi.wang@novita.ai> Co-authored-by: wz1qqx <ziqi.wang@novita.ai>
This commit is contained in:
parent
058926d48c
commit
889f8bb250
@ -147,6 +147,14 @@ class LMCacheMPSchedulerAdapter:
|
||||
"""
|
||||
return self.blocks_in_chunk
|
||||
|
||||
def _cleanup_lookup_result(self, request_id: str) -> None:
|
||||
"""
|
||||
Clean up lookup future for a finished request to prevent memory leak.
|
||||
Args:
|
||||
request_id: The ID of the finished request.
|
||||
"""
|
||||
self.lookup_futures.pop(request_id, None)
|
||||
|
||||
# Helper functions
|
||||
def _create_key(self, block_hash: bytes) -> IPCCacheEngineKey:
|
||||
"""Convert a block hash to an IPC cache engine key"""
|
||||
|
||||
@ -701,6 +701,8 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
||||
if condition
|
||||
else LMCacheMPRequestState.READY
|
||||
)
|
||||
# Clean up lookup future in scheduler adapter
|
||||
self.scheduler_adapter._cleanup_lookup_result(request.request_id)
|
||||
|
||||
def build_connector_meta(
|
||||
self, scheduler_output: SchedulerOutput
|
||||
@ -754,6 +756,8 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
||||
Optional KVTransferParams to be included in the request outputs
|
||||
returned by the engine.
|
||||
"""
|
||||
# Clean up request tracker to prevent memory leak
|
||||
self._cleanup_request_tracker(request.request_id)
|
||||
return True, None
|
||||
|
||||
def take_events(self) -> Iterable["KVCacheEvent"]:
|
||||
@ -915,3 +919,15 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
||||
new_tracker = LMCacheMPRequestTracker(request)
|
||||
self.request_trackers[request_id] = new_tracker
|
||||
return self.request_trackers[request_id]
|
||||
|
||||
def _cleanup_request_tracker(self, request_id: str) -> None:
|
||||
"""
|
||||
Clean up request tracker and associated lookup future for a request.
|
||||
This should be called when a request is finished to prevent memory leak.
|
||||
"""
|
||||
# Clean up request tracker
|
||||
if self.request_trackers.pop(request_id, None):
|
||||
logger.debug(
|
||||
"[KVConnector] Cleaned up request_tracker for request %s",
|
||||
request_id,
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user