fix: copy actual sampled_token_ids to req_output_token_ids

Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
zhuhaoran 2025-12-05 15:51:43 +08:00
parent 06579f9a82
commit 70189d8eb0

View File

@ -949,9 +949,11 @@ class InputBatch:
if sampled_token_ids is None:
assert self.async_copy_ready_event is not None
self.async_copy_ready_event.synchronize()
sampled_token_ids = self.sampled_token_ids_cpu.squeeze(-1).tolist()
sampled_token_ids = self.sampled_token_ids_cpu.tolist()
# Replace placeholder token id with actual sampled id.
req_output_token_ids[-1] = sampled_token_ids[prev_index]
req_output_token_ids[-len(sampled_token_ids[prev_index]) :] = (
sampled_token_ids[prev_index]
)
@property
def num_reqs(self) -> int: