diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index d39aea1f2d11..430085d9c978 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -437,14 +437,24 @@ class Scheduler(SchedulerInterface): # The request cannot be scheduled. break + # Handles an edge case when P/D Disaggregation + # is used with Spec Decoding where an + # extra block gets allocated which + # creates a mismatch between the number + # of local and remote blocks. + effective_lookahead_tokens = (0 if request.num_computed_tokens + == 0 else + self.num_lookahead_tokens) + new_blocks = self.kv_cache_manager.allocate_slots( request, num_new_tokens + num_external_computed_tokens, num_new_local_computed_tokens, new_computed_blocks, - num_lookahead_tokens=self.num_lookahead_tokens, + num_lookahead_tokens=effective_lookahead_tokens, delay_cache_blocks=load_kv_async, ) + if new_blocks is None: # The request cannot be scheduled. break