mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-09 04:27:02 +08:00
fix: copy actual sampled_token_ids to req_output_token_ids
Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
parent
06579f9a82
commit
70189d8eb0
@ -949,9 +949,11 @@ class InputBatch:
|
||||
if sampled_token_ids is None:
|
||||
assert self.async_copy_ready_event is not None
|
||||
self.async_copy_ready_event.synchronize()
|
||||
sampled_token_ids = self.sampled_token_ids_cpu.squeeze(-1).tolist()
|
||||
sampled_token_ids = self.sampled_token_ids_cpu.tolist()
|
||||
# Replace placeholder token id with actual sampled id.
|
||||
req_output_token_ids[-1] = sampled_token_ids[prev_index]
|
||||
req_output_token_ids[-len(sampled_token_ids[prev_index]) :] = (
|
||||
sampled_token_ids[prev_index]
|
||||
)
|
||||
|
||||
@property
|
||||
def num_reqs(self) -> int:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user