vllm/tests/models/language/pooling/test_mm_classifier_conversion.py
Kevin H. Luu c64c0b78de
[chore] Move the rest of wikimedia url to S3 (#28921)
Signed-off-by: Kevin H. Luu <khluu000@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-11-18 09:44:18 -08:00

106 lines
2.9 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.config.pooler import PoolerConfig
def test_idefics_multimodal(
vllm_runner,
) -> None:
prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
with vllm_runner(
model_name="HuggingFaceM4/Idefics3-8B-Llama3",
runner="pooling",
task="classify",
convert="classify",
load_format="dummy",
max_model_len=512,
enforce_eager=True,
tensor_parallel_size=1,
disable_log_stats=True,
dtype="bfloat16",
) as vllm_model:
llm = vllm_model.get_llm()
outputs = llm.classify(prompts)
for output in outputs:
assert len(output.outputs.probs) == 2
def update_config(config):
config.text_config.update(
{
"architectures": ["Gemma3ForSequenceClassification"],
"classifier_from_token": ["A", "B", "C", "D", "E"],
"method": "no_post_processing",
"id2label": {
"A": "Chair",
"B": "Couch",
"C": "Table",
"D": "Bed",
"E": "Cupboard",
},
}
)
return config
def test_gemma_multimodal(
vllm_runner,
) -> None:
messages = [
{
"role": "system",
"content": """
You are a helpful assistant. You will be given a product description
which may also include an image. Classify the following product into
one of the categories:
A = chair
B = couch
C = table
D = bed
E = cupboard
You'll answer with exactly one letter (A, B, C, D, or E).""",
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/red_chair.jpg"
},
},
{"type": "text", "text": "A fine 19th century piece of furniture."},
],
},
]
with vllm_runner(
model_name="google/gemma-3-4b-it",
runner="pooling",
task="classify",
convert="classify",
load_format="auto",
hf_overrides=update_config,
pooler_config=PoolerConfig(pooling_type="LAST"),
max_model_len=512,
enforce_eager=True,
tensor_parallel_size=1,
disable_log_stats=True,
dtype="bfloat16",
) as vllm_model:
llm = vllm_model.get_llm()
prompts = llm.preprocess_chat(messages)
result = llm.classify(prompts)
assert result[0].outputs.probs[0] > 0.95
assert all(c < 0.05 for c in result[0].outputs.probs[1:])