mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 20:35:01 +08:00
[Core] Early return in SlidingWindowManager.remove_skipped_blocks (#27673)
Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
parent
d3ab240f39
commit
0b51c9bd8b
@ -394,7 +394,13 @@ class SlidingWindowManager(SingleTypeKVCacheManager):
|
||||
# skipped during the attention computation.
|
||||
last_useful_token = num_computed_tokens - self.sliding_window + 1
|
||||
last_useful_block = last_useful_token // self.block_size
|
||||
if last_useful_block <= 0:
|
||||
# Early return if tokens are not enough to fill the sliding window
|
||||
return
|
||||
blocks = self.req_to_blocks[request_id]
|
||||
if blocks[last_useful_block - 1] == self._null_block:
|
||||
# Early return if there are no blocks to remove
|
||||
return
|
||||
removed_blocks: list[KVCacheBlock] = []
|
||||
for i in range(last_useful_block - 1, -1, -1):
|
||||
if blocks[i] == self._null_block:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user