From b616f6a53dc0caaf1eeb4be785df76415dde8633 Mon Sep 17 00:00:00 2001 From: Chenheli Hua Date: Wed, 2 Jul 2025 20:10:39 -0700 Subject: [PATCH] [Misc] Small: Fix video loader return type annotations. (#20389) Signed-off-by: Chenheli Hua --- tests/multimodal/test_utils.py | 7 ++++--- vllm/multimodal/utils.py | 4 ++-- vllm/multimodal/video.py | 14 ++++++++------ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index d927ae5cd0b27..b642e5c0ad47e 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -172,9 +172,10 @@ async def test_fetch_video_http(video_url: str, num_frames: int): "num_frames": num_frames, }}) - video_sync = connector.fetch_video(video_url) - video_async = await connector.fetch_video_async(video_url) - assert np.array_equal(video_sync[0], video_async[0]) + video_sync, metadata_sync = connector.fetch_video(video_url) + video_async, metadata_async = await connector.fetch_video_async(video_url) + assert np.array_equal(video_sync, video_async) + assert metadata_sync == metadata_async # Used for the next two tests related to `merge_and_sort_multimodal_metadata`. diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index 2f2be59a1f42d..22e696141b84b 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -228,7 +228,7 @@ class MediaConnector: video_url: str, *, image_mode: str = "RGB", - ) -> npt.NDArray: + ) -> tuple[npt.NDArray, dict[str, Any]]: """ Load video from a HTTP or base64 data URL. """ @@ -248,7 +248,7 @@ class MediaConnector: video_url: str, *, image_mode: str = "RGB", - ) -> npt.NDArray: + ) -> tuple[npt.NDArray, dict[str, Any]]: """ Asynchronously load video from a HTTP or base64 data URL. diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index d9589068a203b..ef1380bdb614c 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -6,6 +6,7 @@ from abc import abstractmethod from functools import partial from io import BytesIO from pathlib import Path +from typing import Any import numpy as np import numpy.typing as npt @@ -57,7 +58,7 @@ class VideoLoader: def load_bytes(cls, data: bytes, num_frames: int = -1, - **kwargs) -> npt.NDArray: + **kwargs) -> tuple[npt.NDArray, dict[str, Any]]: raise NotImplementedError @@ -106,7 +107,7 @@ class OpenCVVideoBackend(VideoLoader): def load_bytes(cls, data: bytes, num_frames: int = -1, - **kwargs) -> npt.NDArray: + **kwargs) -> tuple[npt.NDArray, dict[str, Any]]: import cv2 backend = cls().get_cv2_video_api() @@ -179,12 +180,13 @@ class VideoMediaIO(MediaIO[npt.NDArray]): video_loader_backend = envs.VLLM_VIDEO_LOADER_BACKEND self.video_loader = VIDEO_LOADER_REGISTRY.load(video_loader_backend) - def load_bytes(self, data: bytes) -> npt.NDArray: + def load_bytes(self, data: bytes) -> tuple[npt.NDArray, dict[str, Any]]: return self.video_loader.load_bytes(data, num_frames=self.num_frames, **self.kwargs) - def load_base64(self, media_type: str, data: str) -> npt.NDArray: + def load_base64(self, media_type: str, + data: str) -> tuple[npt.NDArray, dict[str, Any]]: if media_type.lower() == "video/jpeg": load_frame = partial( self.image_io.load_base64, @@ -194,11 +196,11 @@ class VideoMediaIO(MediaIO[npt.NDArray]): return np.stack([ np.asarray(load_frame(frame_data)) for frame_data in data.split(",") - ]) + ]), {} return self.load_bytes(base64.b64decode(data)) - def load_file(self, filepath: Path) -> npt.NDArray: + def load_file(self, filepath: Path) -> tuple[npt.NDArray, dict[str, Any]]: with filepath.open("rb") as f: data = f.read()