From 53d7c39271aeb0568afcae337396a972e1848586 Mon Sep 17 00:00:00 2001 From: Aviad Rossmann Date: Fri, 1 Aug 2025 09:23:18 +0300 Subject: [PATCH] Update sampling_metadata.py (#21937) Signed-off-by: Aviad Rossmann --- vllm/model_executor/sampling_metadata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py index 56f0f0984bfa0..66bcf1c4bfe50 100644 --- a/vllm/model_executor/sampling_metadata.py +++ b/vllm/model_executor/sampling_metadata.py @@ -539,37 +539,37 @@ class SamplingTensors: temperatures_t = torch.tensor( temperatures, device="cpu", - dtype=dtype, + dtype=torch.float32, pin_memory=pin_memory, ) top_ps_t = torch.tensor( top_ps, device="cpu", - dtype=dtype, + dtype=torch.float32, pin_memory=pin_memory, ) min_ps_t = torch.tensor( min_ps, device="cpu", - dtype=dtype, + dtype=torch.float32, pin_memory=pin_memory, ) presence_penalties_t = torch.tensor( presence_penalties, device="cpu", - dtype=dtype, + dtype=torch.float32, pin_memory=pin_memory, ) frequency_penalties_t = torch.tensor( frequency_penalties, device="cpu", - dtype=dtype, + dtype=torch.float32, pin_memory=pin_memory, ) repetition_penalties_t = torch.tensor( repetition_penalties, device="cpu", - dtype=dtype, + dtype=torch.float32, pin_memory=pin_memory, ) top_ks_t = torch.tensor(