From 53da4cd397821195e1b8bc481ada9f83a2007d05 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Tue, 24 Jun 2025 21:20:04 +0800 Subject: [PATCH] [Bugfix][CPU] Fix InputBatch for pooling models in the CPU v1 (#20014) Signed-off-by: jiang1.li --- tests/models/language/pooling/test_reward.py | 2 +- vllm/v1/worker/cpu_model_runner.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/models/language/pooling/test_reward.py b/tests/models/language/pooling/test_reward.py index 085cdca9f1f3..ec3d25ee22a9 100644 --- a/tests/models/language/pooling/test_reward.py +++ b/tests/models/language/pooling/test_reward.py @@ -101,4 +101,4 @@ def test_prm_models( hf_output = torch.tensor(hf_output) vllm_output = torch.tensor(vllm_output) - assert torch.allclose(hf_output, vllm_output, 1e-2) + assert torch.allclose(hf_output, vllm_output, 1.5e-2) diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index 6631c9636eac..370de9f11599 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -7,6 +7,7 @@ import torch from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.model_executor.model_loader import get_model +from vllm.model_executor.models.interfaces import has_step_pooler from vllm.v1.worker.gpu_model_runner import GPUModelRunner logger = init_logger(__name__) @@ -52,6 +53,9 @@ class CPUModelRunner(GPUModelRunner): logger.info("Starting to load model %s...", self.model_config.model) self.model = get_model(vllm_config=self.vllm_config) + if has_step_pooler(self.model): + self.input_batch.logits_processing_needs_token_ids = True + if self.lora_config: self.model = self.load_lora_model(self.model, self.model_config, self.scheduler_config,