[mrope][Qwen2-VL] Fix edge case where getting index of image/video token can potentially throw in default vl mrope implementation. (#23895)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
Chenheli Hua 2025-08-28 23:29:13 -07:00 committed by GitHub
parent b4f9e9631c
commit 2d0afcc9dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -670,12 +670,18 @@ class MRotaryEmbedding(RotaryEmbedding):
image_index, video_index = 0, 0
for _ in range(image_nums + video_nums):
video_second_per_grid_t = 0.0
if image_token_id in input_tokens and remain_images > 0:
ed_image = input_tokens.index(image_token_id, st)
if remain_images > 0:
try:
ed_image = input_tokens.index(image_token_id, st)
except ValueError:
ed_image = len(input_tokens) + 1
else:
ed_image = len(input_tokens) + 1
if video_token_id in input_tokens and remain_videos > 0:
ed_video = input_tokens.index(video_token_id, st)
if remain_videos > 0:
try:
ed_video = input_tokens.index(video_token_id, st)
except ValueError:
ed_video = len(input_tokens) + 1
else:
ed_video = len(input_tokens) + 1
if ed_image < ed_video: