vllm/tests/models/test_oot_registration.py
Harry Mellor d6953beb91
Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 07:06:22 -07:00

103 lines
3.2 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm import LLM, SamplingParams
from vllm.assets.image import ImageAsset
from vllm.multimodal.image import convert_image_mode
from ..utils import create_new_process_for_each_test
@create_new_process_for_each_test()
def test_plugin(
monkeypatch: pytest.MonkeyPatch,
dummy_opt_path: str,
):
with monkeypatch.context() as m:
m.setenv("VLLM_PLUGINS", "")
with pytest.raises(ValueError, match="are not supported for now"):
LLM(model=dummy_opt_path, load_format="dummy")
@create_new_process_for_each_test()
def test_oot_registration_text_generation(
monkeypatch: pytest.MonkeyPatch,
dummy_opt_path: str,
):
with monkeypatch.context() as m:
m.setenv("VLLM_PLUGINS", "register_dummy_model")
prompts = ["Hello, my name is", "The text does not matter"]
sampling_params = SamplingParams(temperature=0)
llm = LLM(model=dummy_opt_path, load_format="dummy")
first_token = llm.get_tokenizer().decode(0)
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
generated_text = output.outputs[0].text
# make sure only the first token is generated
rest = generated_text.replace(first_token, "")
assert rest == ""
@create_new_process_for_each_test()
def test_oot_registration_embedding(
monkeypatch: pytest.MonkeyPatch,
dummy_gemma2_embedding_path: str,
):
with monkeypatch.context() as m:
m.setenv("VLLM_PLUGINS", "register_dummy_model")
prompts = ["Hello, my name is", "The text does not matter"]
llm = LLM(
model=dummy_gemma2_embedding_path, load_format="dummy", max_model_len=2048
)
outputs = llm.embed(prompts)
for output in outputs:
assert all(v == 0 for v in output.outputs.embedding)
image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
@create_new_process_for_each_test()
def test_oot_registration_multimodal(
monkeypatch: pytest.MonkeyPatch,
dummy_llava_path: str,
):
with monkeypatch.context() as m:
m.setenv("VLLM_PLUGINS", "register_dummy_model")
prompts = [
{
"prompt": "What's in the image?<image>",
"multi_modal_data": {"image": image},
},
{
"prompt": "Describe the image<image>",
"multi_modal_data": {"image": image},
},
]
sampling_params = SamplingParams(temperature=0)
llm = LLM(
model=dummy_llava_path,
load_format="dummy",
max_num_seqs=1,
trust_remote_code=True,
gpu_memory_utilization=0.98,
max_model_len=4096,
enforce_eager=True,
limit_mm_per_prompt={"image": 1},
)
first_token = llm.get_tokenizer().decode(0)
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
generated_text = output.outputs[0].text
# make sure only the first token is generated
rest = generated_text.replace(first_token, "")
assert rest == ""