mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 07:37:53 +08:00
[Bugfix] Add reset prefix cache for online serving (#22726)
Signed-off-by: iAmir97 <Amir.balwel@embeddedllm.com> Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com> Co-authored-by: iAmir97 <Amir.balwel@embeddedllm.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
f4efda821d
commit
7655dc3e45
@ -1092,6 +1092,7 @@ class AsyncLLMEngine(EngineClient):
|
|||||||
self.engine.reset_prefix_cache(device)
|
self.engine.reset_prefix_cache(device)
|
||||||
|
|
||||||
async def sleep(self, level: int = 1) -> None:
|
async def sleep(self, level: int = 1) -> None:
|
||||||
|
await self.reset_prefix_cache()
|
||||||
self.engine.sleep(level)
|
self.engine.sleep(level)
|
||||||
|
|
||||||
async def wake_up(self, tags: Optional[list[str]] = None) -> None:
|
async def wake_up(self, tags: Optional[list[str]] = None) -> None:
|
||||||
|
|||||||
@ -576,6 +576,7 @@ class AsyncLLM(EngineClient):
|
|||||||
await self.engine_core.reset_prefix_cache_async()
|
await self.engine_core.reset_prefix_cache_async()
|
||||||
|
|
||||||
async def sleep(self, level: int = 1) -> None:
|
async def sleep(self, level: int = 1) -> None:
|
||||||
|
await self.reset_prefix_cache()
|
||||||
await self.engine_core.sleep_async(level)
|
await self.engine_core.sleep_async(level)
|
||||||
|
|
||||||
async def wake_up(self, tags: Optional[list[str]] = None) -> None:
|
async def wake_up(self, tags: Optional[list[str]] = None) -> None:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user