From 067da2d1df141363f0ad65939049709b2dbd5080 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 7 Oct 2025 20:32:37 -0700 Subject: [PATCH] [Core] Simplify setting new_token_ids in CachedRequestData (#26388) Signed-off-by: Nick Hill --- vllm/v1/core/sched/scheduler.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 09f1e91a9e858..fd9b9855ac9c9 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -712,7 +712,6 @@ class Scheduler(SchedulerInterface): num_computed_tokens: list[int] = [] num_output_tokens: list[int] = [] - use_connector = self.connector is not None for req in itertools.chain(running_reqs, resumed_reqs): req_id = req.request_id req_ids.append(req_id) @@ -729,16 +728,11 @@ class Scheduler(SchedulerInterface): req.num_computed_tokens : req.num_computed_tokens + num_tokens ] new_token_ids.append(token_ids) - elif use_connector: - # When using a KVConnector, we add a placeholder to avoid index - # out of bounds errors. TODO: Remove this once the KVConnector - # is updated to handle token IDs properly. - new_token_ids.append([]) new_block_ids.append( req_to_new_blocks[req_id].get_block_ids(allow_none=True) ) num_computed_tokens.append(req.num_computed_tokens) - num_output_tokens.append(len(req.output_token_ids)) + num_output_tokens.append(req.num_output_tokens) # Because resumed_reqs is usually empty, it is more efficient to do # in-place appending so that we don't need to allocate a new list. resumed_from_preemption = [False] * len(running_reqs)