mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 01:15:26 +08:00
[Bugfix] Fix MRoPE Errors in the Qwen-VL Model When Processing Pure Text (#18526)
Co-authored-by: 松灵 <wpf272043@alibaba-inc.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
fa72f9a812
commit
f6037d1907
@ -729,7 +729,10 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
|||||||
mm_kwargs, placeholder_maps = MultiModalPlaceholderMap.from_seq_group(
|
mm_kwargs, placeholder_maps = MultiModalPlaceholderMap.from_seq_group(
|
||||||
seq_group_metadata,
|
seq_group_metadata,
|
||||||
range(positions[0], positions[0] + len(positions)))
|
range(positions[0], positions[0] + len(positions)))
|
||||||
if not mm_kwargs:
|
|
||||||
|
# M-RoPE requires mrope_positions even for plain text; return early
|
||||||
|
# when mm_kwargs is empty only if inter_data.is_prompt is False.
|
||||||
|
if not mm_kwargs and not inter_data.is_prompt:
|
||||||
return
|
return
|
||||||
|
|
||||||
inter_data.multi_modal_kwargs = mm_kwargs
|
inter_data.multi_modal_kwargs = mm_kwargs
|
||||||
@ -741,12 +744,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
|||||||
video_grid_thw = mm_kwargs.get("video_grid_thw", None)
|
video_grid_thw = mm_kwargs.get("video_grid_thw", None)
|
||||||
audio_feature_lengths = mm_kwargs.get("audio_feature_lengths",
|
audio_feature_lengths = mm_kwargs.get("audio_feature_lengths",
|
||||||
None)
|
None)
|
||||||
assert (
|
|
||||||
image_grid_thw is not None or video_grid_thw is not None
|
|
||||||
or audio_feature_lengths is not None), (
|
|
||||||
"mrope embedding type requires multi-modal input mapper "
|
|
||||||
"returns 'image_grid_thw' or 'video_grid_thw' or "
|
|
||||||
"'audio_feature_lengths'.")
|
|
||||||
|
|
||||||
second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None)
|
second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None)
|
||||||
use_audio_in_video = mm_kwargs.get("use_audio_in_video", False)
|
use_audio_in_video = mm_kwargs.get("use_audio_in_video", False)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user