From 0b51c9bd8b19cee3a494b0f966a6b0a846a40193 Mon Sep 17 00:00:00 2001 From: Jialin Ouyang Date: Tue, 28 Oct 2025 18:32:33 -0700 Subject: [PATCH] [Core] Early return in SlidingWindowManager.remove_skipped_blocks (#27673) Signed-off-by: Jialin Ouyang --- vllm/v1/core/single_type_kv_cache_manager.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py index 6699fb9818cb..575ae3d7d83b 100644 --- a/vllm/v1/core/single_type_kv_cache_manager.py +++ b/vllm/v1/core/single_type_kv_cache_manager.py @@ -394,7 +394,13 @@ class SlidingWindowManager(SingleTypeKVCacheManager): # skipped during the attention computation. last_useful_token = num_computed_tokens - self.sliding_window + 1 last_useful_block = last_useful_token // self.block_size + if last_useful_block <= 0: + # Early return if tokens are not enough to fill the sliding window + return blocks = self.req_to_blocks[request_id] + if blocks[last_useful_block - 1] == self._null_block: + # Early return if there are no blocks to remove + return removed_blocks: list[KVCacheBlock] = [] for i in range(last_useful_block - 1, -1, -1): if blocks[i] == self._null_block: