mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-04 07:26:31 +08:00
[Bugfix] Prevent IndexError for cached requests when pipeline parallelism is disabled (#20486)
Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
This commit is contained in:
parent
110df74332
commit
edd270bc78
@ -635,6 +635,8 @@ class Scheduler(SchedulerInterface):
|
||||
token_ids = req.all_token_ids[req.num_computed_tokens:req.
|
||||
num_computed_tokens + num_tokens]
|
||||
new_token_ids.append(token_ids)
|
||||
else:
|
||||
new_token_ids.append([])
|
||||
new_block_ids.append(req_to_new_block_ids[req_id])
|
||||
num_computed_tokens.append(req.num_computed_tokens)
|
||||
# Because resumed_reqs is usually empty, it is more efficient to do
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user