diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index fe552db74e2f2..79ab482bd71df 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -635,6 +635,8 @@ class Scheduler(SchedulerInterface): token_ids = req.all_token_ids[req.num_computed_tokens:req. num_computed_tokens + num_tokens] new_token_ids.append(token_ids) + else: + new_token_ids.append([]) new_block_ids.append(req_to_new_block_ids[req_id]) num_computed_tokens.append(req.num_computed_tokens) # Because resumed_reqs is usually empty, it is more efficient to do