diff --git a/tests/models/multimodal/pooling/test_jinavl_reranker.py b/tests/models/multimodal/pooling/test_jinavl_reranker.py index 83d6ab8e4039..50c91f1f81ca 100644 --- a/tests/models/multimodal/pooling/test_jinavl_reranker.py +++ b/tests/models/multimodal/pooling/test_jinavl_reranker.py @@ -1,9 +1,15 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Union import pytest from transformers import AutoModel +from vllm.entrypoints.chat_utils import ChatCompletionContentPartImageParam +from vllm.entrypoints.score_utils import ScoreMultiModalParam + +from ....conftest import HfRunner, VllmRunner + model_name = "jinaai/jina-reranker-m0" mm_processor_kwargs = { @@ -14,73 +20,90 @@ mm_processor_kwargs = { limit_mm_per_prompt = {"image": 2} -def vllm_reranker(model_name, - query, - documents, - query_type="text", - doc_type="text"): - from vllm import LLM +def vllm_reranker( + vllm_runner: type[VllmRunner], + model_name: str, + dtype: str, + query_strs: list[str], + document_strs: list[str], + query_type: str = "text", + doc_type: str = "text", +): - model = LLM( - model=model_name, - task="score", - max_model_len=32768, - mm_processor_kwargs=mm_processor_kwargs, - limit_mm_per_prompt=limit_mm_per_prompt, - ) - - def create_image_param(url: str): + def create_image_param(url: str) -> ChatCompletionContentPartImageParam: return {"type": "image_url", "image_url": {"url": f"{url}"}} - if query_type == "image": - query = {"content": [create_image_param(url) for url in query]} + query: Union[list[str], ScoreMultiModalParam] + if query_type == "text": + query = query_strs + elif query_type == "image": + query = ScoreMultiModalParam( + content=[create_image_param(url) for url in query_strs]) - if doc_type == "image": - documents = {"content": [create_image_param(url) for url in documents]} + documents: Union[list[str], ScoreMultiModalParam] + if doc_type == "text": + documents = document_strs + elif doc_type == "image": + documents = ScoreMultiModalParam( + content=[create_image_param(url) for url in document_strs]) - outputs = model.score(query, documents) + with vllm_runner( + model_name, + task="score", + dtype=dtype, + max_num_seqs=2, + max_model_len=2048, + mm_processor_kwargs=mm_processor_kwargs, + limit_mm_per_prompt=limit_mm_per_prompt, + ) as vllm_model: + outputs = vllm_model.model.score(query, documents) return [output.outputs.score for output in outputs] -def hf_reranker(model_name, - query, - documents, - query_type="text", - doc_type="text"): - +def hf_reranker( + hf_runner: type[HfRunner], + model_name: str, + dtype: str, + query_strs: list[str], + document_strs: list[str], + query_type: str = "text", + doc_type: str = "text", +): checkpoint_to_hf_mapper = { "visual.": "model.visual.", "model.": "model.language_model.", } - model = AutoModel.from_pretrained( - model_name, - torch_dtype="auto", - trust_remote_code=True, - key_mapping=checkpoint_to_hf_mapper).to("cuda").eval() + data_pairs = [[query_strs[0], d] for d in document_strs] - data_pairs = [[query[0], d] for d in documents] - - scores = model.compute_score(data_pairs, - max_length=2048, - query_type=query_type, - doc_type=doc_type) - return scores + with hf_runner( + model_name, + dtype=dtype, + trust_remote_code=True, + auto_cls=AutoModel, + model_kwargs={"key_mapping": checkpoint_to_hf_mapper}, + ) as hf_model: + return hf_model.model.compute_score(data_pairs, + max_length=2048, + query_type=query_type, + doc_type=doc_type) # Visual Documents Reranking @pytest.mark.parametrize("model_name", [model_name]) -def test_model_text_image(model_name): - +@pytest.mark.parametrize("dtype", ["half"]) +def test_model_text_image(hf_runner, vllm_runner, model_name, dtype): query = ["slm markdown"] documents = [ "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png", "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png", ] - hf_outputs = hf_reranker(model_name, query, documents, "text", "image") - vllm_outputs = vllm_reranker(model_name, query, documents, "text", "image") + hf_outputs = hf_reranker(hf_runner, model_name, dtype, query, documents, + "text", "image") + vllm_outputs = vllm_reranker(vllm_runner, model_name, dtype, query, + documents, "text", "image") assert hf_outputs[0] == pytest.approx(vllm_outputs[0], rel=0.02) assert hf_outputs[1] == pytest.approx(vllm_outputs[1], rel=0.02) @@ -88,8 +111,8 @@ def test_model_text_image(model_name): # Textual Documents Reranking @pytest.mark.parametrize("model_name", [model_name]) -def test_model_text_text(model_name): - +@pytest.mark.parametrize("dtype", ["half"]) +def test_model_text_text(hf_runner, vllm_runner, model_name, dtype): query = ["slm markdown"] documents = [ """We present ReaderLM-v2, a compact 1.5 billion parameter language model designed for efficient @@ -104,9 +127,10 @@ def test_model_text_text(model_name): lower computational requirements.""", # noqa: E501 "数据提取么?为什么不用正则啊,你用正则不就全解决了么?", ] - - hf_outputs = hf_reranker(model_name, query, documents, "text", "text") - vllm_outputs = vllm_reranker(model_name, query, documents, "text", "text") + hf_outputs = hf_reranker(hf_runner, model_name, dtype, query, documents, + "text", "text") + vllm_outputs = vllm_reranker(vllm_runner, model_name, dtype, query, + documents, "text", "text") assert hf_outputs[0] == pytest.approx(vllm_outputs[0], rel=0.02) assert hf_outputs[1] == pytest.approx(vllm_outputs[1], rel=0.02) @@ -114,8 +138,8 @@ def test_model_text_text(model_name): # Image Querying for Textual Documents @pytest.mark.parametrize("model_name", [model_name]) -def test_model_image_text(model_name): - +@pytest.mark.parametrize("dtype", ["half"]) +def test_model_image_text(hf_runner, vllm_runner, model_name, dtype): query = [ "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png" ] @@ -133,8 +157,10 @@ def test_model_image_text(model_name): "数据提取么?为什么不用正则啊,你用正则不就全解决了么?", ] - hf_outputs = hf_reranker(model_name, query, documents, "image", "text") - vllm_outputs = vllm_reranker(model_name, query, documents, "image", "text") + hf_outputs = hf_reranker(hf_runner, model_name, dtype, query, documents, + "image", "text") + vllm_outputs = vllm_reranker(vllm_runner, model_name, dtype, query, + documents, "image", "text") assert hf_outputs[0] == pytest.approx(vllm_outputs[0], rel=0.02) assert hf_outputs[1] == pytest.approx(vllm_outputs[1], rel=0.02) @@ -142,8 +168,8 @@ def test_model_image_text(model_name): # Image Querying for Image Documents @pytest.mark.parametrize("model_name", [model_name]) -def test_model_image_image(model_name): - +@pytest.mark.parametrize("dtype", ["half"]) +def test_model_image_image(hf_runner, vllm_runner, model_name, dtype): query = [ "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png" ] @@ -152,9 +178,10 @@ def test_model_image_image(model_name): "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png", ] - hf_outputs = hf_reranker(model_name, query, documents, "image", "image") - vllm_outputs = vllm_reranker(model_name, query, documents, "image", - "image") + hf_outputs = hf_reranker(hf_runner, model_name, dtype, query, documents, + "image", "image") + vllm_outputs = vllm_reranker(vllm_runner, model_name, dtype, query, + documents, "image", "image") assert hf_outputs[0] == pytest.approx(vllm_outputs[0], rel=0.02) assert hf_outputs[1] == pytest.approx(vllm_outputs[1], rel=0.02)