diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 09f1e91a9e85..fd9b9855ac9c 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -712,7 +712,6 @@ class Scheduler(SchedulerInterface): num_computed_tokens: list[int] = [] num_output_tokens: list[int] = [] - use_connector = self.connector is not None for req in itertools.chain(running_reqs, resumed_reqs): req_id = req.request_id req_ids.append(req_id) @@ -729,16 +728,11 @@ class Scheduler(SchedulerInterface): req.num_computed_tokens : req.num_computed_tokens + num_tokens ] new_token_ids.append(token_ids) - elif use_connector: - # When using a KVConnector, we add a placeholder to avoid index - # out of bounds errors. TODO: Remove this once the KVConnector - # is updated to handle token IDs properly. - new_token_ids.append([]) new_block_ids.append( req_to_new_blocks[req_id].get_block_ids(allow_none=True) ) num_computed_tokens.append(req.num_computed_tokens) - num_output_tokens.append(len(req.output_token_ids)) + num_output_tokens.append(req.num_output_tokens) # Because resumed_reqs is usually empty, it is more efficient to do # in-place appending so that we don't need to allocate a new list. resumed_from_preemption = [False] * len(running_reqs)