mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-18 03:47:04 +08:00
[BugFix]Reclaim resources to prevent memory leaks when use LMCacheMPConnector (#30745)
Signed-off-by: wz1qqx <ziqi.wang@novita.ai> Co-authored-by: wz1qqx <ziqi.wang@novita.ai>
This commit is contained in:
parent
058926d48c
commit
889f8bb250
@ -147,6 +147,14 @@ class LMCacheMPSchedulerAdapter:
|
|||||||
"""
|
"""
|
||||||
return self.blocks_in_chunk
|
return self.blocks_in_chunk
|
||||||
|
|
||||||
|
def _cleanup_lookup_result(self, request_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Clean up lookup future for a finished request to prevent memory leak.
|
||||||
|
Args:
|
||||||
|
request_id: The ID of the finished request.
|
||||||
|
"""
|
||||||
|
self.lookup_futures.pop(request_id, None)
|
||||||
|
|
||||||
# Helper functions
|
# Helper functions
|
||||||
def _create_key(self, block_hash: bytes) -> IPCCacheEngineKey:
|
def _create_key(self, block_hash: bytes) -> IPCCacheEngineKey:
|
||||||
"""Convert a block hash to an IPC cache engine key"""
|
"""Convert a block hash to an IPC cache engine key"""
|
||||||
|
|||||||
@ -701,6 +701,8 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
|||||||
if condition
|
if condition
|
||||||
else LMCacheMPRequestState.READY
|
else LMCacheMPRequestState.READY
|
||||||
)
|
)
|
||||||
|
# Clean up lookup future in scheduler adapter
|
||||||
|
self.scheduler_adapter._cleanup_lookup_result(request.request_id)
|
||||||
|
|
||||||
def build_connector_meta(
|
def build_connector_meta(
|
||||||
self, scheduler_output: SchedulerOutput
|
self, scheduler_output: SchedulerOutput
|
||||||
@ -754,6 +756,8 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
|||||||
Optional KVTransferParams to be included in the request outputs
|
Optional KVTransferParams to be included in the request outputs
|
||||||
returned by the engine.
|
returned by the engine.
|
||||||
"""
|
"""
|
||||||
|
# Clean up request tracker to prevent memory leak
|
||||||
|
self._cleanup_request_tracker(request.request_id)
|
||||||
return True, None
|
return True, None
|
||||||
|
|
||||||
def take_events(self) -> Iterable["KVCacheEvent"]:
|
def take_events(self) -> Iterable["KVCacheEvent"]:
|
||||||
@ -915,3 +919,15 @@ class LMCacheMPConnector(KVConnectorBase_V1):
|
|||||||
new_tracker = LMCacheMPRequestTracker(request)
|
new_tracker = LMCacheMPRequestTracker(request)
|
||||||
self.request_trackers[request_id] = new_tracker
|
self.request_trackers[request_id] = new_tracker
|
||||||
return self.request_trackers[request_id]
|
return self.request_trackers[request_id]
|
||||||
|
|
||||||
|
def _cleanup_request_tracker(self, request_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Clean up request tracker and associated lookup future for a request.
|
||||||
|
This should be called when a request is finished to prevent memory leak.
|
||||||
|
"""
|
||||||
|
# Clean up request tracker
|
||||||
|
if self.request_trackers.pop(request_id, None):
|
||||||
|
logger.debug(
|
||||||
|
"[KVConnector] Cleaned up request_tracker for request %s",
|
||||||
|
request_id,
|
||||||
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user