diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 44cdd6f44aa9..6e915a9f6005 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -8,7 +8,8 @@ from collections import defaultdict from pathlib import PosixPath import pytest -from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq +from transformers import (AutoModelForImageTextToText, + AutoModelForTextToWaveform, AutoModelForVision2Seq) from vllm.platforms import current_platform from vllm.utils import identity @@ -140,7 +141,7 @@ VLM_TEST_SETTINGS = { marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), "qwen2_5_omni": VLMTestInfo( - models=["Qwen/Qwen2.5-Omni-7B"], + models=["Qwen/Qwen2.5-Omni-3B"], test_type=( VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, @@ -151,8 +152,9 @@ VLM_TEST_SETTINGS = { video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501 max_model_len=4096, max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForTextToWaveform, vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, + patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner, image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py index aa9d3901fa36..f0f4ed989241 100644 --- a/tests/models/multimodal/generation/vlm_utils/model_utils.py +++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py @@ -706,3 +706,11 @@ def ovis2_patch_hf_runner(hf_model: HfRunner) -> HfRunner: hf_model.processor = processor return hf_model + + +def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner: + """Patches and returns an instance of the HfRunner for Qwen2.5-Omni.""" + thinker = hf_model.model.thinker + thinker.get_output_embeddings = lambda: thinker.lm_head + hf_model.model = thinker + return hf_model diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index 2b1d38dfda97..772a2db3e48a 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -284,7 +284,7 @@ def _test_processing_correctness_mistral( "Qwen/Qwen2-VL-2B-Instruct", "Qwen/Qwen2.5-VL-3B-Instruct", "Qwen/Qwen2-Audio-7B-Instruct", - "Qwen/Qwen2.5-Omni-7B", + "Qwen/Qwen2.5-Omni-3B", "Skywork/Skywork-R1V-38B", "fixie-ai/ultravox-v0_5-llama-3_2-1b", "openai/whisper-large-v3", diff --git a/tests/models/registry.py b/tests/models/registry.py index a19c43b698f1..cce2c82b3dc3 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -72,12 +72,15 @@ class _HfExamplesInfo: return current_version = TRANSFORMERS_VERSION + cur_base_version = Version(current_version).base_version min_version = self.min_transformers_version max_version = self.max_transformers_version msg = f"`transformers=={current_version}` installed, but `transformers" - if min_version and Version(current_version) < Version(min_version): + # Only check the base version for the min/max version, otherwise preview + # models cannot be run because `x.yy.0.dev0`<`x.yy.0` + if min_version and Version(cur_base_version) < Version(min_version): msg += f">={min_version}` is required to run this model." - elif max_version and Version(current_version) > Version(max_version): + elif max_version and Version(cur_base_version) > Version(max_version): msg += f"<={max_version}` is required to run this model." else: return @@ -362,8 +365,8 @@ _MULTIMODAL_EXAMPLE_MODELS = { "Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501 "Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501 "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"), # noqa: E501 - "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B", # noqa: E501 - min_transformers_version="4.52"), # noqa: E501 + "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B", + min_transformers_version="4.52"), "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"), "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"), # noqa: E501 "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b", # noqa: E501