[Bugfix][CPU] Fix InputBatch for pooling models in the CPU v1 (#20014)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
Li, Jiang 2025-06-24 21:20:04 +08:00 committed by GitHub
parent 9a3b88328f
commit 53da4cd397
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 1 deletions

View File

@ -101,4 +101,4 @@ def test_prm_models(
hf_output = torch.tensor(hf_output) hf_output = torch.tensor(hf_output)
vllm_output = torch.tensor(vllm_output) vllm_output = torch.tensor(vllm_output)
assert torch.allclose(hf_output, vllm_output, 1e-2) assert torch.allclose(hf_output, vllm_output, 1.5e-2)

View File

@ -7,6 +7,7 @@ import torch
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.model_loader import get_model from vllm.model_executor.model_loader import get_model
from vllm.model_executor.models.interfaces import has_step_pooler
from vllm.v1.worker.gpu_model_runner import GPUModelRunner from vllm.v1.worker.gpu_model_runner import GPUModelRunner
logger = init_logger(__name__) logger = init_logger(__name__)
@ -52,6 +53,9 @@ class CPUModelRunner(GPUModelRunner):
logger.info("Starting to load model %s...", self.model_config.model) logger.info("Starting to load model %s...", self.model_config.model)
self.model = get_model(vllm_config=self.vllm_config) self.model = get_model(vllm_config=self.vllm_config)
if has_step_pooler(self.model):
self.input_batch.logits_processing_needs_token_ids = True
if self.lora_config: if self.lora_config:
self.model = self.load_lora_model(self.model, self.model_config, self.model = self.load_lora_model(self.model, self.model_config,
self.scheduler_config, self.scheduler_config,