From 5124f5bf51b83e6f344c1bc6652e8c4d81313b34 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sat, 19 Apr 2025 02:37:02 -0700 Subject: [PATCH] [Model] Qwen2.5-Omni Cleanup (#16872) --- docs/source/models/supported_models.md | 4 ++-- vllm/model_executor/models/qwen2_5_omni_thinker.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md index 1b80c801d5be5..331f18db817d1 100644 --- a/docs/source/models/supported_models.md +++ b/docs/source/models/supported_models.md @@ -1117,8 +1117,8 @@ Our PaliGemma implementations have the same problem as Gemma 3 (see above) for b ::: :::{note} -To use Qwen2.5-Omni, you have to install a fork of Hugging Face Transformers library from source via -`pip install git+https://github.com/BakerBunker/transformers.git@qwen25omni`. +To use Qwen2.5-Omni, you have to install Hugging Face Transformers library from source via +`pip install git+https://github.com/huggingface/transformers.git`. Read audio from video pre-processing is currently supported on V0 (but not V1), because overlapping modalities is not yet supported in V1. `--mm-processor-kwargs '{"use_audio_in_video": True}'`. diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index 517d6eb7d6d0e..5b0693623ed9e 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -518,9 +518,6 @@ class Qwen2_5OmniThinkerMultiModalProcessor( """ Qwen2.5-Omni reimplements this function to handle text only. """ - print(prompt) - print(hf_processor_mm_kwargs) - print(mm_items) if isinstance(prompt, str): if enable_hf_prompt_update: return self._apply_hf_processor_text_mm(