mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:05:01 +08:00
[Bugfix][CPU] Fix InputBatch for pooling models in the CPU v1 (#20014)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
parent
9a3b88328f
commit
53da4cd397
@ -101,4 +101,4 @@ def test_prm_models(
|
|||||||
hf_output = torch.tensor(hf_output)
|
hf_output = torch.tensor(hf_output)
|
||||||
vllm_output = torch.tensor(vllm_output)
|
vllm_output = torch.tensor(vllm_output)
|
||||||
|
|
||||||
assert torch.allclose(hf_output, vllm_output, 1e-2)
|
assert torch.allclose(hf_output, vllm_output, 1.5e-2)
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import torch
|
|||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.model_loader import get_model
|
from vllm.model_executor.model_loader import get_model
|
||||||
|
from vllm.model_executor.models.interfaces import has_step_pooler
|
||||||
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
|
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -52,6 +53,9 @@ class CPUModelRunner(GPUModelRunner):
|
|||||||
logger.info("Starting to load model %s...", self.model_config.model)
|
logger.info("Starting to load model %s...", self.model_config.model)
|
||||||
self.model = get_model(vllm_config=self.vllm_config)
|
self.model = get_model(vllm_config=self.vllm_config)
|
||||||
|
|
||||||
|
if has_step_pooler(self.model):
|
||||||
|
self.input_batch.logits_processing_needs_token_ids = True
|
||||||
|
|
||||||
if self.lora_config:
|
if self.lora_config:
|
||||||
self.model = self.load_lora_model(self.model, self.model_config,
|
self.model = self.load_lora_model(self.model, self.model_config,
|
||||||
self.scheduler_config,
|
self.scheduler_config,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user