From 8d339e86e5ed4328e8be9497a933ff85a2706d20 Mon Sep 17 00:00:00 2001 From: zhuhaoran Date: Tue, 23 Dec 2025 17:52:38 +0800 Subject: [PATCH] fix corner case for update_async_output_token_ids Signed-off-by: zhuhaoran --- vllm/v1/worker/gpu_input_batch.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index b09e982e70137..50a421c4a6fec 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -942,18 +942,15 @@ class InputBatch: sampled_token_ids = self.sampled_token_ids_cpu.tolist() # Replace placeholder token id(s) with actual sampled id(s). if sampled_ids := sampled_token_ids[prev_index]: - num_placeholders = 0 - for t in reversed(req_output_token_ids): + num_replace = 0 + for t in sampled_ids: if t == -1: - num_placeholders += 1 - else: break - if num_placeholders == 0: + num_replace += 1 + + if num_replace == 0: continue - assert num_placeholders <= len(sampled_ids) - req_output_token_ids[-num_placeholders:] = sampled_ids[ - :num_placeholders - ] + req_output_token_ids[-num_replace:] = sampled_ids[:num_replace] def update_async_spec_token_ids( self,