From 6071e989df1531b59ef35568f83f7351afb0b51e Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sun, 25 May 2025 18:33:35 +0100 Subject: [PATCH] [Core][Multimodal] Convert PIL Image to array without data copy when hashing (#18682) Signed-off-by: Lukas Geiger --- vllm/multimodal/hasher.py | 4 ++-- vllm/multimodal/video.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py index a5a4dcd0b6e1..b4cd6a90834c 100644 --- a/vllm/multimodal/hasher.py +++ b/vllm/multimodal/hasher.py @@ -36,8 +36,8 @@ class MultiModalHasher: return np.array(obj).tobytes() if isinstance(obj, Image.Image): - return cls.item_to_bytes("image", - np.array(convert_image_mode(obj, "RGBA"))) + return cls.item_to_bytes( + "image", np.asarray(convert_image_mode(obj, "RGBA"))) if isinstance(obj, torch.Tensor): return cls.item_to_bytes("tensor", obj.numpy()) if isinstance(obj, np.ndarray): diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index 3685fd4c3458..261d56abad9c 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -164,7 +164,7 @@ class VideoMediaIO(MediaIO[npt.NDArray]): ) return np.stack([ - np.array(load_frame(frame_data)) + np.asarray(load_frame(frame_data)) for frame_data in data.split(",") ])