From be48360c1fb9284804f9e1cae23b58e23e762877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83?= Date: Tue, 20 May 2025 21:59:48 +0800 Subject: [PATCH] [Bugfix] Fix MRoPE Errors in the Qwen-VL Model When Processing Pure Text (#18407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 松灵 --- vllm/worker/model_runner.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 12025617e5127..15f40bcef8969 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -729,8 +729,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): mm_kwargs, placeholder_maps = MultiModalPlaceholderMap.from_seq_group( seq_group_metadata, range(positions[0], positions[0] + len(positions))) - if not mm_kwargs: - return inter_data.multi_modal_kwargs = mm_kwargs inter_data.multi_modal_placeholder_maps = placeholder_maps @@ -741,12 +739,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): video_grid_thw = mm_kwargs.get("video_grid_thw", None) audio_feature_lengths = mm_kwargs.get("audio_feature_lengths", None) - assert ( - image_grid_thw is not None or video_grid_thw is not None - or audio_feature_lengths is not None), ( - "mrope embedding type requires multi-modal input mapper " - "returns 'image_grid_thw' or 'video_grid_thw' or " - "'audio_feature_lengths'.") second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None) use_audio_in_video = mm_kwargs.get("use_audio_in_video", False)