mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 01:55:01 +08:00
140 lines
3.8 KiB
Python
140 lines
3.8 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
import pytest
|
|
from transformers import CLIPModel
|
|
|
|
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
|
|
from ...utils import check_embeddings_close
|
|
|
|
HF_TEXT_PROMPTS = [
|
|
"a photo of a stop sign",
|
|
"a photo of a cherry blossom",
|
|
]
|
|
|
|
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts(
|
|
{
|
|
"stop_sign": "",
|
|
"cherry_blossom": "",
|
|
}
|
|
)
|
|
|
|
MODELS = ["openai/clip-vit-base-patch32"]
|
|
|
|
|
|
def _run_test(
|
|
hf_runner: type[HfRunner],
|
|
vllm_runner: type[VllmRunner],
|
|
input_texts: list[str],
|
|
input_images: PromptImageInput,
|
|
model: str,
|
|
*,
|
|
dtype: str,
|
|
) -> None:
|
|
# NOTE: take care of the order. run vLLM first, and then run HF.
|
|
# vLLM needs a fresh new process without cuda initialization.
|
|
# if we run HF first, the cuda initialization will be done and it
|
|
# will hurt multiprocessing backend with fork method (the default method).
|
|
with vllm_runner(
|
|
model, runner="pooling", dtype=dtype, enforce_eager=True, max_model_len=77
|
|
) as vllm_model:
|
|
vllm_outputs = vllm_model.embed(input_texts, images=input_images)
|
|
|
|
with hf_runner(model, dtype=dtype, auto_cls=CLIPModel) as hf_model:
|
|
all_inputs = hf_model.get_inputs(input_texts, images=input_images)
|
|
|
|
all_outputs = []
|
|
for inputs in all_inputs:
|
|
inputs = hf_model.wrap_device(inputs)
|
|
|
|
if "pixel_values" in inputs:
|
|
pooled_output = hf_model.model.get_image_features(
|
|
pixel_values=inputs.pixel_values,
|
|
).squeeze(0)
|
|
else:
|
|
pooled_output = hf_model.model.get_text_features(
|
|
input_ids=inputs.input_ids,
|
|
attention_mask=inputs.attention_mask,
|
|
).squeeze(0)
|
|
|
|
all_outputs.append(pooled_output.tolist())
|
|
|
|
hf_outputs = all_outputs
|
|
|
|
check_embeddings_close(
|
|
embeddings_0_lst=hf_outputs,
|
|
embeddings_1_lst=vllm_outputs,
|
|
name_0="hf",
|
|
name_1="vllm",
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("model", MODELS)
|
|
@pytest.mark.parametrize("dtype", ["float"])
|
|
def test_models_text(
|
|
hf_runner,
|
|
vllm_runner,
|
|
image_assets,
|
|
model: str,
|
|
dtype: str,
|
|
) -> None:
|
|
input_texts_images = [(text, None) for text in HF_TEXT_PROMPTS]
|
|
input_texts = [text for text, _ in input_texts_images]
|
|
input_images = [image for _, image in input_texts_images]
|
|
|
|
_run_test(
|
|
hf_runner,
|
|
vllm_runner,
|
|
input_texts,
|
|
input_images, # type: ignore
|
|
model,
|
|
dtype=dtype,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("model", MODELS)
|
|
@pytest.mark.parametrize("dtype", ["float"])
|
|
def test_models_image(
|
|
hf_runner,
|
|
vllm_runner,
|
|
image_assets,
|
|
model: str,
|
|
dtype: str,
|
|
) -> None:
|
|
input_texts_images = [
|
|
(text, asset.pil_image) for text, asset in zip(HF_IMAGE_PROMPTS, image_assets)
|
|
]
|
|
input_texts = [text for text, _ in input_texts_images]
|
|
input_images = [image for _, image in input_texts_images]
|
|
|
|
_run_test(
|
|
hf_runner,
|
|
vllm_runner,
|
|
input_texts,
|
|
input_images,
|
|
model,
|
|
dtype=dtype,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("model", MODELS)
|
|
@pytest.mark.parametrize("dtype", ["float"])
|
|
def test_models_text_image_no_crash(
|
|
vllm_runner,
|
|
image_assets,
|
|
model: str,
|
|
dtype: str,
|
|
) -> None:
|
|
texts = [HF_TEXT_PROMPTS[0]]
|
|
images = [image_assets[0].pil_image]
|
|
|
|
with vllm_runner(
|
|
model, runner="pooling", dtype=dtype, enforce_eager=True, max_model_len=77
|
|
) as vllm_model:
|
|
with pytest.raises(ValueError, match="not both"):
|
|
vllm_model.embed(texts, images=images)
|
|
|
|
# Should still be able to run subsequent requests
|
|
vllm_model.embed(texts)
|
|
vllm_model.embed([""], images=images)
|