From 87c94bc87943818ad039d5c916df793fbd081e6a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 1 Aug 2025 13:24:46 +0100 Subject: [PATCH] Revert "Update sampling_metadata.py (#21937)" (#22088) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/sampling_metadata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py index 66bcf1c4bfe50..56f0f0984bfa0 100644 --- a/vllm/model_executor/sampling_metadata.py +++ b/vllm/model_executor/sampling_metadata.py @@ -539,37 +539,37 @@ class SamplingTensors: temperatures_t = torch.tensor( temperatures, device="cpu", - dtype=torch.float32, + dtype=dtype, pin_memory=pin_memory, ) top_ps_t = torch.tensor( top_ps, device="cpu", - dtype=torch.float32, + dtype=dtype, pin_memory=pin_memory, ) min_ps_t = torch.tensor( min_ps, device="cpu", - dtype=torch.float32, + dtype=dtype, pin_memory=pin_memory, ) presence_penalties_t = torch.tensor( presence_penalties, device="cpu", - dtype=torch.float32, + dtype=dtype, pin_memory=pin_memory, ) frequency_penalties_t = torch.tensor( frequency_penalties, device="cpu", - dtype=torch.float32, + dtype=dtype, pin_memory=pin_memory, ) repetition_penalties_t = torch.tensor( repetition_penalties, device="cpu", - dtype=torch.float32, + dtype=dtype, pin_memory=pin_memory, ) top_ks_t = torch.tensor(