[Bugfix] Prevent IndexError for cached requests when pipeline parallelism is disabled (#20486)

Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
This commit is contained in:
Peter Pan 2025-07-08 00:41:15 +08:00 committed by GitHub
parent 110df74332
commit edd270bc78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -635,6 +635,8 @@ class Scheduler(SchedulerInterface):
token_ids = req.all_token_ids[req.num_computed_tokens:req.
num_computed_tokens + num_tokens]
new_token_ids.append(token_ids)
else:
new_token_ids.append([])
new_block_ids.append(req_to_new_block_ids[req_id])
num_computed_tokens.append(req.num_computed_tokens)
# Because resumed_reqs is usually empty, it is more efficient to do