mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-17 04:17:04 +08:00
top_p top_k
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
01bf16ede4
commit
cc340e26af
@ -249,14 +249,26 @@ class RequestState:
|
||||
batch_idx_to_req_idx: torch.Tensor,
|
||||
) -> SamplingMetadata:
|
||||
batch_size = batch_idx_to_req_idx.shape[0]
|
||||
if self.top_p_reqs:
|
||||
top_p_buffer = self.top_p.mirror_to_gpu()
|
||||
top_p = self.top_p.gpu
|
||||
else:
|
||||
top_p_buffer = self.top_p.gpu_buffer
|
||||
top_p = None
|
||||
if self.top_k_reqs:
|
||||
top_k_buffer = self.top_k.mirror_to_gpu()
|
||||
top_k = self.top_k.gpu
|
||||
else:
|
||||
top_k_buffer = self.top_k.gpu_buffer
|
||||
top_k = None
|
||||
# TODO(woosuk): Use UVA to optimize CPU -> GPU copy.
|
||||
_make_sampling_metadata_kernel[(batch_size, )](
|
||||
batch_idx_to_req_idx,
|
||||
self.temperature.mirror_to_gpu(),
|
||||
self.temperature.gpu,
|
||||
self.top_p.mirror_to_gpu(),
|
||||
top_p_buffer,
|
||||
self.top_p.gpu,
|
||||
self.top_k.mirror_to_gpu(),
|
||||
top_k_buffer,
|
||||
self.top_k.gpu,
|
||||
self.frequency_penalties.mirror_to_gpu(),
|
||||
self.frequency_penalties.gpu,
|
||||
@ -274,8 +286,8 @@ class RequestState:
|
||||
temperature=self.temperature.gpu[:batch_size],
|
||||
all_greedy=not self.random_reqs,
|
||||
all_random=not self.greedy_reqs,
|
||||
top_p=self.top_p.gpu[:batch_size],
|
||||
top_k=self.top_k.gpu[:batch_size],
|
||||
top_p=top_p,
|
||||
top_k=top_k,
|
||||
frequency_penalties=self.frequency_penalties.gpu[:batch_size],
|
||||
presence_penalties=self.presence_penalties.gpu[:batch_size],
|
||||
repetition_penalties=self.repetition_penalties.gpu[:batch_size],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user