[Core][Multimodal] Convert PIL Image to array without data copy when hashing (#18682)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
2025-12-13 06:04:59 +08:00 · 2025-05-25 18:33:35 +01:00 · 2025-05-25 18:33:35 +01:00 · 6071e989df
commit 6071e989df
parent 57fd13a707
2 changed files with 3 additions and 3 deletions
--- a/vllm/multimodal/hasher.py
+++ b/vllm/multimodal/hasher.py
@ -36,8 +36,8 @@ class MultiModalHasher:
            return np.array(obj).tobytes()
        if isinstance(obj, Image.Image):
-            return cls.item_to_bytes("image",
+            return cls.item_to_bytes(
-                                     np.array(convert_image_mode(obj, "RGBA")))
+                "image", np.asarray(convert_image_mode(obj, "RGBA")))
        if isinstance(obj, torch.Tensor):
            return cls.item_to_bytes("tensor", obj.numpy())
        if isinstance(obj, np.ndarray):
--- a/vllm/multimodal/video.py
+++ b/vllm/multimodal/video.py
@ -164,7 +164,7 @@ class VideoMediaIO(MediaIO[npt.NDArray]):
            )
            return np.stack([
-                np.array(load_frame(frame_data))
+                np.asarray(load_frame(frame_data))
                for frame_data in data.split(",")
            ])