[Bugfix] Fix broken Qwen2.5-omni tests (#17613)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py 2025-05-04 01:08:14 +08:00 committed by GitHub
parent 887d7af882
commit f66f1e0fa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 21 additions and 8 deletions

View File

@ -8,7 +8,8 @@ from collections import defaultdict
from pathlib import PosixPath from pathlib import PosixPath
import pytest import pytest
from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq from transformers import (AutoModelForImageTextToText,
AutoModelForTextToWaveform, AutoModelForVision2Seq)
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import identity from vllm.utils import identity
@ -140,7 +141,7 @@ VLM_TEST_SETTINGS = {
marks=[pytest.mark.core_model, pytest.mark.cpu_model], marks=[pytest.mark.core_model, pytest.mark.cpu_model],
), ),
"qwen2_5_omni": VLMTestInfo( "qwen2_5_omni": VLMTestInfo(
models=["Qwen/Qwen2.5-Omni-7B"], models=["Qwen/Qwen2.5-Omni-3B"],
test_type=( test_type=(
VLMTestType.IMAGE, VLMTestType.IMAGE,
VLMTestType.MULTI_IMAGE, VLMTestType.MULTI_IMAGE,
@ -151,8 +152,9 @@ VLM_TEST_SETTINGS = {
video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501 video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501
max_model_len=4096, max_model_len=4096,
max_num_seqs=2, max_num_seqs=2,
auto_cls=AutoModelForVision2Seq, auto_cls=AutoModelForTextToWaveform,
vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner,
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
marks=[pytest.mark.core_model, pytest.mark.cpu_model], marks=[pytest.mark.core_model, pytest.mark.cpu_model],
), ),

View File

@ -706,3 +706,11 @@ def ovis2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
hf_model.processor = processor hf_model.processor = processor
return hf_model return hf_model
def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
"""Patches and returns an instance of the HfRunner for Qwen2.5-Omni."""
thinker = hf_model.model.thinker
thinker.get_output_embeddings = lambda: thinker.lm_head
hf_model.model = thinker
return hf_model

View File

@ -284,7 +284,7 @@ def _test_processing_correctness_mistral(
"Qwen/Qwen2-VL-2B-Instruct", "Qwen/Qwen2-VL-2B-Instruct",
"Qwen/Qwen2.5-VL-3B-Instruct", "Qwen/Qwen2.5-VL-3B-Instruct",
"Qwen/Qwen2-Audio-7B-Instruct", "Qwen/Qwen2-Audio-7B-Instruct",
"Qwen/Qwen2.5-Omni-7B", "Qwen/Qwen2.5-Omni-3B",
"Skywork/Skywork-R1V-38B", "Skywork/Skywork-R1V-38B",
"fixie-ai/ultravox-v0_5-llama-3_2-1b", "fixie-ai/ultravox-v0_5-llama-3_2-1b",
"openai/whisper-large-v3", "openai/whisper-large-v3",

View File

@ -72,12 +72,15 @@ class _HfExamplesInfo:
return return
current_version = TRANSFORMERS_VERSION current_version = TRANSFORMERS_VERSION
cur_base_version = Version(current_version).base_version
min_version = self.min_transformers_version min_version = self.min_transformers_version
max_version = self.max_transformers_version max_version = self.max_transformers_version
msg = f"`transformers=={current_version}` installed, but `transformers" msg = f"`transformers=={current_version}` installed, but `transformers"
if min_version and Version(current_version) < Version(min_version): # Only check the base version for the min/max version, otherwise preview
# models cannot be run because `x.yy.0.dev0`<`x.yy.0`
if min_version and Version(cur_base_version) < Version(min_version):
msg += f">={min_version}` is required to run this model." msg += f">={min_version}` is required to run this model."
elif max_version and Version(current_version) > Version(max_version): elif max_version and Version(cur_base_version) > Version(max_version):
msg += f"<={max_version}` is required to run this model." msg += f"<={max_version}` is required to run this model."
else: else:
return return
@ -362,8 +365,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501 "Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501 "Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"), # noqa: E501 "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"), # noqa: E501
"Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B", # noqa: E501 "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B",
min_transformers_version="4.52"), # noqa: E501 min_transformers_version="4.52"),
"SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"), "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),
"SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"), # noqa: E501 "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"), # noqa: E501
"UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b", # noqa: E501 "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b", # noqa: E501