[v1][P/D] Fix a edge case in kv cache schedule (#19182)

Co-authored-by: jinghui <jinghui@fb.com>
2026-01-29 00:17:14 +08:00 · 2025-06-05 23:32:55 -07:00 · 2025-06-05 23:32:55 -07:00 · 90b78ec5f9
commit 90b78ec5f9
parent 91a2ef98ea
1 changed files with 2 additions and 0 deletions
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@ -1009,6 +1009,8 @@ class Scheduler(SchedulerInterface):
        # Now that the blocks are ready, actually cache them.
        block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0]
        num_computed_tokens = len(block_ids) * self.block_size
+        # Handle the case where num request tokens less then one block.
+        num_computed_tokens = min(num_computed_tokens, request.num_tokens)
        if num_computed_tokens == request.num_tokens:
            num_computed_tokens -= 1
        self.kv_cache_manager.cache_blocks(