From 7655dc3e45e65f39eee9755cda5298e7319240f2 Mon Sep 17 00:00:00 2001 From: iAmir97 <71513472+iAmir97@users.noreply.github.com> Date: Thu, 14 Aug 2025 18:04:18 +0700 Subject: [PATCH] [Bugfix] Add reset prefix cache for online serving (#22726) Signed-off-by: iAmir97 Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com> Co-authored-by: iAmir97 Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/engine/async_llm_engine.py | 1 + vllm/v1/engine/async_llm.py | 1 + 2 files changed, 2 insertions(+) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index b6ee4105340a1..73726eeab5fc7 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1092,6 +1092,7 @@ class AsyncLLMEngine(EngineClient): self.engine.reset_prefix_cache(device) async def sleep(self, level: int = 1) -> None: + await self.reset_prefix_cache() self.engine.sleep(level) async def wake_up(self, tags: Optional[list[str]] = None) -> None: diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index a2706327914c5..edc2e235c3c3f 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -576,6 +576,7 @@ class AsyncLLM(EngineClient): await self.engine_core.reset_prefix_cache_async() async def sleep(self, level: int = 1) -> None: + await self.reset_prefix_cache() await self.engine_core.sleep_async(level) async def wake_up(self, tags: Optional[list[str]] = None) -> None: