diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 44cdd6f44aa9..6e915a9f6005 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -8,7 +8,8 @@ from collections import defaultdict
 from pathlib import PosixPath
 
 import pytest
-from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
+from transformers import (AutoModelForImageTextToText,
+                          AutoModelForTextToWaveform, AutoModelForVision2Seq)
 
 from vllm.platforms import current_platform
 from vllm.utils import identity
@@ -140,7 +141,7 @@ VLM_TEST_SETTINGS = {
         marks=[pytest.mark.core_model, pytest.mark.cpu_model],
     ),
     "qwen2_5_omni": VLMTestInfo(
-        models=["Qwen/Qwen2.5-Omni-7B"],
+        models=["Qwen/Qwen2.5-Omni-3B"],
         test_type=(
             VLMTestType.IMAGE,
             VLMTestType.MULTI_IMAGE,
@@ -151,8 +152,9 @@ VLM_TEST_SETTINGS = {
         video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
-        auto_cls=AutoModelForVision2Seq,
+        auto_cls=AutoModelForTextToWaveform,
         vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
+        patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner,
         image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
         marks=[pytest.mark.core_model, pytest.mark.cpu_model],
     ),
diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py
index aa9d3901fa36..f0f4ed989241 100644
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -706,3 +706,11 @@ def ovis2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
 
     hf_model.processor = processor
     return hf_model
+
+
+def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
+    """Patches and returns an instance of the HfRunner for Qwen2.5-Omni."""
+    thinker = hf_model.model.thinker
+    thinker.get_output_embeddings = lambda: thinker.lm_head
+    hf_model.model = thinker
+    return hf_model
diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py
index 2b1d38dfda97..772a2db3e48a 100644
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@@ -284,7 +284,7 @@ def _test_processing_correctness_mistral(
     "Qwen/Qwen2-VL-2B-Instruct",
     "Qwen/Qwen2.5-VL-3B-Instruct",
     "Qwen/Qwen2-Audio-7B-Instruct",
-    "Qwen/Qwen2.5-Omni-7B",
+    "Qwen/Qwen2.5-Omni-3B",
     "Skywork/Skywork-R1V-38B",
     "fixie-ai/ultravox-v0_5-llama-3_2-1b",
     "openai/whisper-large-v3",
diff --git a/tests/models/registry.py b/tests/models/registry.py
index a19c43b698f1..cce2c82b3dc3 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -72,12 +72,15 @@ class _HfExamplesInfo:
             return
 
         current_version = TRANSFORMERS_VERSION
+        cur_base_version = Version(current_version).base_version
         min_version = self.min_transformers_version
         max_version = self.max_transformers_version
         msg = f"`transformers=={current_version}` installed, but `transformers"
-        if min_version and Version(current_version) < Version(min_version):
+        # Only check the base version for the min/max version, otherwise preview
+        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
+        if min_version and Version(cur_base_version) < Version(min_version):
             msg += f">={min_version}` is required to run this model."
-        elif max_version and Version(current_version) > Version(max_version):
+        elif max_version and Version(cur_base_version) > Version(max_version):
             msg += f"<={max_version}` is required to run this model."
         else:
             return
@@ -362,8 +365,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
     "Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"),  # noqa: E501
     "Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"),  # noqa: E501
     "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"),  # noqa: E501
-    "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B",  # noqa: E501
-                                                                  min_transformers_version="4.52"),  # noqa: E501
+    "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B",
+                                        min_transformers_version="4.52"),
     "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),
     "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"),  # noqa: E501
     "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",  # noqa: E501