From 8e9ffd37d6499c6855b04b2a4d8f5326b5e2f78f Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 14 Mar 2025 02:25:37 +0800 Subject: [PATCH] [Misc] Clean up processor tests (#14771) Signed-off-by: DarkLight1337 --- .../multimodal/processing/test_h2ovl.py | 8 +++--- .../multimodal/processing/test_idefics3.py | 10 +++----- .../multimodal/processing/test_internvl.py | 8 +++--- .../multimodal/processing/test_llava_next.py | 9 +++---- .../processing/test_llava_onevision.py | 9 +++---- .../multimodal/processing/test_phi3v.py | 4 +-- .../multimodal/processing/test_qwen2_vl.py | 3 +-- tests/models/utils.py | 25 ++++++++++--------- 8 files changed, 30 insertions(+), 46 deletions(-) diff --git a/tests/models/multimodal/processing/test_h2ovl.py b/tests/models/multimodal/processing/test_h2ovl.py index 84471c92a293..713fc733e21c 100644 --- a/tests/models/multimodal/processing/test_h2ovl.py +++ b/tests/models/multimodal/processing/test_h2ovl.py @@ -96,14 +96,14 @@ def _run_check( tokenizer = processor.info.get_tokenizer() config = processor.info.get_hf_config() + prompt = "" * len(images) mm_data = {"image": images} total_expected_num_patches = sum( _get_expected_num_patches(config, image, len(images), min_num, max_num) for image in images) - processed_inputs = processor.apply("" * len(images), mm_data, - mm_processor_kwargs) + processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs) # Ensure we have the right number of placeholders per num_crops size image_token_id = tokenizer.convert_tokens_to_ids("") @@ -152,9 +152,7 @@ def test_processor_override( } ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, - trust_remote_code=True, + model_id, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, limit_mm_per_prompt={"image": len(size_factors)}, ) diff --git a/tests/models/multimodal/processing/test_idefics3.py b/tests/models/multimodal/processing/test_idefics3.py index 0a0f1cb38938..fdbe2f17692f 100644 --- a/tests/models/multimodal/processing/test_idefics3.py +++ b/tests/models/multimodal/processing/test_idefics3.py @@ -9,10 +9,8 @@ from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from ....conftest import _ImageAssets from ...utils import build_model_context -models = ["HuggingFaceM4/Idefics3-8B-Llama3"] - -@pytest.mark.parametrize("model", models) +@pytest.mark.parametrize("model_id", ["HuggingFaceM4/Idefics3-8B-Llama3"]) # yapf: disable @pytest.mark.parametrize( ("mm_processor_kwargs", "expected_toks_per_img"), @@ -25,7 +23,7 @@ models = ["HuggingFaceM4/Idefics3-8B-Llama3"] @pytest.mark.parametrize("kwargs_on_init", [True, False]) def test_processor_override( image_assets: _ImageAssets, - model: str, + model_id: str, mm_processor_kwargs: dict[str, object], expected_toks_per_img: int, num_imgs: int, @@ -36,9 +34,7 @@ def test_processor_override( # in this test and assume that the kwargs will be correctly expanded by # the partial when calling the custom input processor. ctx = build_model_context( - model_name=model, - tokenizer_name=model, - trust_remote_code=True, + model_id, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, limit_mm_per_prompt={"image": num_imgs}, ) diff --git a/tests/models/multimodal/processing/test_internvl.py b/tests/models/multimodal/processing/test_internvl.py index adbc4f5b5586..f5bd661071ac 100644 --- a/tests/models/multimodal/processing/test_internvl.py +++ b/tests/models/multimodal/processing/test_internvl.py @@ -56,14 +56,14 @@ def _run_check( tokenizer = processor.info.get_tokenizer() config = processor.info.get_hf_config() + prompt = "" * len(images) mm_data = {"image": images} total_expected_num_patches = sum( _get_expected_num_patches(config, image, len(images), min_num, max_num) for image in images) - processed_inputs = processor.apply("" * len(images), mm_data, - mm_processor_kwargs) + processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs) # Ensure we have the right number of placeholders per num_crops size image_token_id = tokenizer.convert_tokens_to_ids("") @@ -109,9 +109,7 @@ def test_processor_override( } ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, - trust_remote_code=True, + model_id, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, limit_mm_per_prompt={"image": len(size_factors)}, ) diff --git a/tests/models/multimodal/processing/test_llava_next.py b/tests/models/multimodal/processing/test_llava_next.py index dca25e5d4c4c..74bca0e35899 100644 --- a/tests/models/multimodal/processing/test_llava_next.py +++ b/tests/models/multimodal/processing/test_llava_next.py @@ -36,8 +36,7 @@ def _validate_image_max_tokens_one( @pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"]) def test_processor_max_tokens(model_id): ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=None, limit_mm_per_prompt={"image": 1}, ) @@ -136,8 +135,7 @@ def _test_image_prompt_replacements( @pytest.mark.parametrize("num_imgs", [1, 2]) def test_processor_prompt_replacements_regression(model_id, num_imgs): ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=None, limit_mm_per_prompt={"image": num_imgs}, ) @@ -166,8 +164,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs): @pytest.mark.parametrize("num_imgs", [1]) def test_processor_prompt_replacements_all(model_id, num_imgs): ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=None, limit_mm_per_prompt={"image": num_imgs}, ) diff --git a/tests/models/multimodal/processing/test_llava_onevision.py b/tests/models/multimodal/processing/test_llava_onevision.py index 96abc840f052..c27898a40b71 100644 --- a/tests/models/multimodal/processing/test_llava_onevision.py +++ b/tests/models/multimodal/processing/test_llava_onevision.py @@ -37,8 +37,7 @@ def _validate_image_max_tokens_one( ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"]) def test_processor_max_tokens(model_id): ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=None, limit_mm_per_prompt={"image": 1}, ) @@ -136,8 +135,7 @@ def _test_image_prompt_replacements( @pytest.mark.parametrize("num_imgs", [1, 2]) def test_processor_prompt_replacements_regression(model_id, num_imgs): ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=None, limit_mm_per_prompt={"image": num_imgs}, ) @@ -167,8 +165,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs): @pytest.mark.parametrize("num_imgs", [1]) def test_processor_prompt_replacements_all(model_id, num_imgs): ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=None, limit_mm_per_prompt={"image": num_imgs}, ) diff --git a/tests/models/multimodal/processing/test_phi3v.py b/tests/models/multimodal/processing/test_phi3v.py index 420644f70842..2f0c8e7e5492 100644 --- a/tests/models/multimodal/processing/test_phi3v.py +++ b/tests/models/multimodal/processing/test_phi3v.py @@ -35,9 +35,7 @@ def test_processor_override( from vllm.model_executor.models.phi3v import _IMAGE_TOKEN_ID ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, - trust_remote_code=True, + model_id, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, limit_mm_per_prompt={"image": num_imgs}, ) diff --git a/tests/models/multimodal/processing/test_qwen2_vl.py b/tests/models/multimodal/processing/test_qwen2_vl.py index b882528aafb9..95204c7ebb4d 100644 --- a/tests/models/multimodal/processing/test_qwen2_vl.py +++ b/tests/models/multimodal/processing/test_qwen2_vl.py @@ -30,8 +30,7 @@ def test_processor_override( ): """Ensure Qwen2VLMultiModalProcessor handles min/max pixels properly.""" ctx = build_model_context( - model_name=model_id, - tokenizer_name=model_id, + model_id, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, limit_mm_per_prompt={"image": num_imgs}, ) diff --git a/tests/models/utils.py b/tests/models/utils.py index b0182d545f4b..2280a6c916d9 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -10,6 +10,8 @@ from vllm.config import ModelConfig, TaskOption from vllm.inputs import InputContext from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs +from .registry import HF_EXAMPLE_MODELS + TokensText = tuple[list[int], str] @@ -250,10 +252,8 @@ def check_logprobs_close( def build_model_context( - model_name: str, + model_id: str, task: TaskOption = "auto", - tokenizer_name: Optional[str] = None, - trust_remote_code: bool = False, dtype: Optional[Union[str, torch.dtype]] = None, mm_processor_kwargs: Optional[dict] = None, limit_mm_per_prompt: Optional[dict] = None, @@ -262,9 +262,7 @@ def build_model_context( """Creates an InputContext for a given model. Args: - model_name: Name of the model being considered. - tokenizer_name: Name of the tokenizer being considered. - trust_remote_code: Whether or not to allow loading remote code. + model_id: ID of the model being considered. mm_processor_kwargs: optional processor kwargs for to be leveraged in the input processor, mapper, dummy data creation, etc. limit_mm_per_prompt: Multimodal limits. @@ -272,21 +270,24 @@ def build_model_context( Returns: InputContext for the model being considered. """ - if tokenizer_name is None: - tokenizer_name = model_name + model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id) + model_info.check_available_online(on_fail="skip") + model_info.check_transformers_version(on_fail="skip") + if dtype is None: dtype = "half" model_config = ModelConfig( - model_name, + model_id, task=task, - tokenizer=tokenizer_name, - tokenizer_mode="auto", - trust_remote_code=trust_remote_code, + tokenizer=model_info.tokenizer or model_id, + tokenizer_mode=model_info.tokenizer_mode, + trust_remote_code=model_info.trust_remote_code, dtype=dtype, seed=0, mm_processor_kwargs=mm_processor_kwargs, limit_mm_per_prompt=limit_mm_per_prompt, disable_mm_preprocessor_cache=disable_mm_preprocessor_cache, + hf_overrides=model_info.hf_overrides, ) return InputContext(model_config)