# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Custom input builders for edge-cases in different models.""" from io import BytesIO from typing import Callable import requests from PIL import Image from vllm.multimodal.image import rescale_image_size from vllm.multimodal.video import (rescale_video_size, resize_video, sample_frames_from_video) from .....conftest import IMAGE_ASSETS, VIDEO_ASSETS from .builders import build_multi_image_inputs, build_single_image_inputs from .types import ImageSizeWrapper, PromptWithMultiModalInput, SizeType def multi_image_multi_aspect_ratio_inputs(formatter: Callable[[str], str]): """Builds inputs for multi-image (varied sizes/aspect ratio) testing. Args: formatter: model-specific prompt formatter. """ stop_sign = IMAGE_ASSETS[0].pil_image cherry_blossom = IMAGE_ASSETS[1].pil_image # Apply the selected formatter to the base prompts img_prompts = [ "\nDescribe 2 images.", "\nDescribe 2 images.", "\nDescribe 4 images.", "\nWhat is the season?", ] formatted_prompts = [formatter(prompt) for prompt in img_prompts] aspect_ratio_images = [ [stop_sign, cherry_blossom], # Images with different sizes and aspect-ratios [ rescale_image_size(stop_sign, 0.1), stop_sign, ], [ stop_sign, rescale_image_size(stop_sign, 0.25), cherry_blossom.resize((183, 488)), cherry_blossom.resize((488, 183)) ], cherry_blossom, ] return [ PromptWithMultiModalInput( prompts=formatted_prompts, image_data=aspect_ratio_images, ) ] def multi_video_multi_aspect_ratio_inputs(formatter: Callable[[str], str], num_frames: int = 16): """Builds inputs for multi-video (varied sizes/aspect ratio) testing. Args: formatter: model-specific prompt formatter. """ video = sample_frames_from_video(VIDEO_ASSETS[0].np_ndarrays, num_frames) # Apply the selected formatter to the base prompts video_prompts = [ "