[Core] Early return in SlidingWindowManager.remove_skipped_blocks (#27673)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
2025-12-10 20:35:01 +08:00 · 2025-10-28 18:32:33 -07:00 · 2025-10-28 18:32:33 -07:00 · 0b51c9bd8b
commit 0b51c9bd8b
parent d3ab240f39
1 changed files with 6 additions and 0 deletions
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@ -394,7 +394,13 @@ class SlidingWindowManager(SingleTypeKVCacheManager):
        # skipped during the attention computation.
        last_useful_token = num_computed_tokens - self.sliding_window + 1
        last_useful_block = last_useful_token // self.block_size
+        if last_useful_block <= 0:
+            # Early return if tokens are not enough to fill the sliding window
+            return
        blocks = self.req_to_blocks[request_id]
+        if blocks[last_useful_block - 1] == self._null_block:
+            # Early return if there are no blocks to remove
+            return
        removed_blocks: list[KVCacheBlock] = []
        for i in range(last_useful_block - 1, -1, -1):
            if blocks[i] == self._null_block: