mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-27 01:17:03 +08:00
fix
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
4c2a337e67
commit
b16e2d9602
@ -174,13 +174,16 @@ class RequestState:
|
|||||||
num_computed_tokens: int,
|
num_computed_tokens: int,
|
||||||
sampling_params: SamplingParams,
|
sampling_params: SamplingParams,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
assert len(self.free_indices) > 0, "No free space in GPU worker states"
|
||||||
req_idx = self.free_indices.pop()
|
req_idx = self.free_indices.pop()
|
||||||
self.req_id_to_index[req_id] = req_idx
|
self.req_id_to_index[req_id] = req_idx
|
||||||
self.index_to_req_id[req_idx] = req_id
|
self.index_to_req_id[req_idx] = req_id
|
||||||
|
|
||||||
self.num_prompt_tokens.np[req_idx] = len(prompt_token_ids)
|
prompt_len = len(prompt_token_ids)
|
||||||
|
self.num_prompt_tokens.np[req_idx] = prompt_len
|
||||||
|
self.num_tokens.np[req_idx] = prompt_len
|
||||||
|
self.token_ids.np[req_idx, :prompt_len] = prompt_token_ids
|
||||||
self.num_computed_tokens.np[req_idx] = num_computed_tokens
|
self.num_computed_tokens.np[req_idx] = num_computed_tokens
|
||||||
self.append_token_ids(req_idx, prompt_token_ids)
|
|
||||||
|
|
||||||
self.temperature.np[req_idx] = sampling_params.temperature
|
self.temperature.np[req_idx] = sampling_params.temperature
|
||||||
if sampling_params.sampling_type == SamplingType.GREEDY:
|
if sampling_params.sampling_type == SamplingType.GREEDY:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user