From 5124f5bf51b83e6f344c1bc6652e8c4d81313b34 Mon Sep 17 00:00:00 2001
From: Roger Wang <hey@rogerw.me>
Date: Sat, 19 Apr 2025 02:37:02 -0700
Subject: [PATCH] [Model] Qwen2.5-Omni Cleanup  (#16872)

---
 docs/source/models/supported_models.md             | 4 ++--
 vllm/model_executor/models/qwen2_5_omni_thinker.py | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md
index 1b80c801d5be5..331f18db817d1 100644
--- a/docs/source/models/supported_models.md
+++ b/docs/source/models/supported_models.md
@@ -1117,8 +1117,8 @@ Our PaliGemma implementations have the same problem as Gemma 3 (see above) for b
 :::
 
 :::{note}
-To use Qwen2.5-Omni, you have to install a fork of Hugging Face Transformers library from source via
-`pip install git+https://github.com/BakerBunker/transformers.git@qwen25omni`.
+To use Qwen2.5-Omni, you have to install Hugging Face Transformers library from source via
+`pip install git+https://github.com/huggingface/transformers.git`.
 
 Read audio from video pre-processing is currently supported on V0 (but not V1), because overlapping modalities is not yet supported in V1.
 `--mm-processor-kwargs '{"use_audio_in_video": True}'`.
diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py
index 517d6eb7d6d0e..5b0693623ed9e 100644
--- a/vllm/model_executor/models/qwen2_5_omni_thinker.py
+++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py
@@ -518,9 +518,6 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
         """
         Qwen2.5-Omni reimplements this function to handle text only.
         """
-        print(prompt)
-        print(hf_processor_mm_kwargs)
-        print(mm_items)
         if isinstance(prompt, str):
             if enable_hf_prompt_update:
                 return self._apply_hf_processor_text_mm(