From 0b51c9bd8b19cee3a494b0f966a6b0a846a40193 Mon Sep 17 00:00:00 2001
From: Jialin Ouyang <Jialin.Ouyang@gmail.com>
Date: Tue, 28 Oct 2025 18:32:33 -0700
Subject: [PATCH] [Core] Early return in
 SlidingWindowManager.remove_skipped_blocks (#27673)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
---
 vllm/v1/core/single_type_kv_cache_manager.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py
index 6699fb9818cb..575ae3d7d83b 100644
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -394,7 +394,13 @@ class SlidingWindowManager(SingleTypeKVCacheManager):
         # skipped during the attention computation.
         last_useful_token = num_computed_tokens - self.sliding_window + 1
         last_useful_block = last_useful_token // self.block_size
+        if last_useful_block <= 0:
+            # Early return if tokens are not enough to fill the sliding window
+            return
         blocks = self.req_to_blocks[request_id]
+        if blocks[last_useful_block - 1] == self._null_block:
+            # Early return if there are no blocks to remove
+            return
         removed_blocks: list[KVCacheBlock] = []
         for i in range(last_useful_block - 1, -1, -1):
             if blocks[i] == self._null_block: