mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 07:02:15 +08:00
fix: copy actual sampled_token_ids to req_output_token_ids
Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
parent
06579f9a82
commit
70189d8eb0
@ -949,9 +949,11 @@ class InputBatch:
|
|||||||
if sampled_token_ids is None:
|
if sampled_token_ids is None:
|
||||||
assert self.async_copy_ready_event is not None
|
assert self.async_copy_ready_event is not None
|
||||||
self.async_copy_ready_event.synchronize()
|
self.async_copy_ready_event.synchronize()
|
||||||
sampled_token_ids = self.sampled_token_ids_cpu.squeeze(-1).tolist()
|
sampled_token_ids = self.sampled_token_ids_cpu.tolist()
|
||||||
# Replace placeholder token id with actual sampled id.
|
# Replace placeholder token id with actual sampled id.
|
||||||
req_output_token_ids[-1] = sampled_token_ids[prev_index]
|
req_output_token_ids[-len(sampled_token_ids[prev_index]) :] = (
|
||||||
|
sampled_token_ids[prev_index]
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def num_reqs(self) -> int:
|
def num_reqs(self) -> int:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user