From b16e2d9602178a7aeeeeb2c21096b0ddbd3ea44a Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 1 Sep 2025 02:10:48 -0700 Subject: [PATCH] fix Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu_worker_states.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_worker_states.py b/vllm/v1/worker/gpu_worker_states.py index a3ce3b2630fee..77323dbd739ec 100644 --- a/vllm/v1/worker/gpu_worker_states.py +++ b/vllm/v1/worker/gpu_worker_states.py @@ -174,13 +174,16 @@ class RequestState: num_computed_tokens: int, sampling_params: SamplingParams, ) -> None: + assert len(self.free_indices) > 0, "No free space in GPU worker states" req_idx = self.free_indices.pop() self.req_id_to_index[req_id] = req_idx self.index_to_req_id[req_idx] = req_id - self.num_prompt_tokens.np[req_idx] = len(prompt_token_ids) + prompt_len = len(prompt_token_ids) + self.num_prompt_tokens.np[req_idx] = prompt_len + self.num_tokens.np[req_idx] = prompt_len + self.token_ids.np[req_idx, :prompt_len] = prompt_token_ids self.num_computed_tokens.np[req_idx] = num_computed_tokens - self.append_token_ids(req_idx, prompt_token_ids) self.temperature.np[req_idx] = sampling_params.temperature if sampling_params.sampling_type == SamplingType.GREEDY: