mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:44:57 +08:00
[v1][P/D] Fix a edge case in kv cache schedule (#19182)
Co-authored-by: jinghui <jinghui@fb.com>
This commit is contained in:
parent
91a2ef98ea
commit
90b78ec5f9
@ -1009,6 +1009,8 @@ class Scheduler(SchedulerInterface):
|
||||
# Now that the blocks are ready, actually cache them.
|
||||
block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0]
|
||||
num_computed_tokens = len(block_ids) * self.block_size
|
||||
# Handle the case where num request tokens less then one block.
|
||||
num_computed_tokens = min(num_computed_tokens, request.num_tokens)
|
||||
if num_computed_tokens == request.num_tokens:
|
||||
num_computed_tokens -= 1
|
||||
self.kv_cache_manager.cache_blocks(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user