mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-19 06:45:01 +08:00
[Core] Simplify setting new_token_ids in CachedRequestData (#26388)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
046118b938
commit
067da2d1df
@ -712,7 +712,6 @@ class Scheduler(SchedulerInterface):
|
|||||||
num_computed_tokens: list[int] = []
|
num_computed_tokens: list[int] = []
|
||||||
num_output_tokens: list[int] = []
|
num_output_tokens: list[int] = []
|
||||||
|
|
||||||
use_connector = self.connector is not None
|
|
||||||
for req in itertools.chain(running_reqs, resumed_reqs):
|
for req in itertools.chain(running_reqs, resumed_reqs):
|
||||||
req_id = req.request_id
|
req_id = req.request_id
|
||||||
req_ids.append(req_id)
|
req_ids.append(req_id)
|
||||||
@ -729,16 +728,11 @@ class Scheduler(SchedulerInterface):
|
|||||||
req.num_computed_tokens : req.num_computed_tokens + num_tokens
|
req.num_computed_tokens : req.num_computed_tokens + num_tokens
|
||||||
]
|
]
|
||||||
new_token_ids.append(token_ids)
|
new_token_ids.append(token_ids)
|
||||||
elif use_connector:
|
|
||||||
# When using a KVConnector, we add a placeholder to avoid index
|
|
||||||
# out of bounds errors. TODO: Remove this once the KVConnector
|
|
||||||
# is updated to handle token IDs properly.
|
|
||||||
new_token_ids.append([])
|
|
||||||
new_block_ids.append(
|
new_block_ids.append(
|
||||||
req_to_new_blocks[req_id].get_block_ids(allow_none=True)
|
req_to_new_blocks[req_id].get_block_ids(allow_none=True)
|
||||||
)
|
)
|
||||||
num_computed_tokens.append(req.num_computed_tokens)
|
num_computed_tokens.append(req.num_computed_tokens)
|
||||||
num_output_tokens.append(len(req.output_token_ids))
|
num_output_tokens.append(req.num_output_tokens)
|
||||||
# Because resumed_reqs is usually empty, it is more efficient to do
|
# Because resumed_reqs is usually empty, it is more efficient to do
|
||||||
# in-place appending so that we don't need to allocate a new list.
|
# in-place appending so that we don't need to allocate a new list.
|
||||||
resumed_from_preemption = [False] * len(running_reqs)
|
resumed_from_preemption = [False] * len(running_reqs)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user