From 22341b996e7b6361624dd2909df0d9b37ae6e41e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Staszek=20Pa=C5=9Bko?= Date: Fri, 15 Aug 2025 14:32:56 +0200 Subject: [PATCH] Improve multimodal hasher performance for re-used Image prompts (#22825) Signed-off-by: Staszek Pasko --- tests/multimodal/test_hasher.py | 20 ++++++++++++++++++++ vllm/multimodal/hasher.py | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/tests/multimodal/test_hasher.py b/tests/multimodal/test_hasher.py index 42cb40739dcc3..75a233c2567cb 100644 --- a/tests/multimodal/test_hasher.py +++ b/tests/multimodal/test_hasher.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import uuid from pathlib import Path import numpy as np @@ -72,3 +73,22 @@ def test_hash_non_contiguous_array(): hasher = MultiModalHasher # Both should be hashable and produce the same hashes assert hasher.hash_kwargs(data=arr) == hasher.hash_kwargs(data=arr_c) + + +def test_hash_image_exif_id(): + # Test that EXIF ImageId tag can be used to store UUID + # and the hasher will use that instead of the image data. + image1 = image2 = Image.new("1", size=(10, 20)) + id = uuid.uuid4() + image1.getexif()[Image.ExifTags.Base.ImageID] = id + image2 = Image.open(ASSETS_DIR / "image1.png") + image2.getexif()[Image.ExifTags.Base.ImageID] = "Not a UUID" + image2a = Image.open(ASSETS_DIR / "image1.png") + + hasher = MultiModalHasher + # first image has UUID in ImageID, so it should hash to that UUID + assert hasher.hash_kwargs(image=image1) == hasher.hash_kwargs( + image=id.bytes) + # second image has non-UUID in ImageID, so it should hash to the image data + assert hasher.hash_kwargs(image=image2) == hasher.hash_kwargs( + image=image2a) diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py index ac27bb66f7b51..c9ce1f0be5f88 100644 --- a/vllm/multimodal/hasher.py +++ b/vllm/multimodal/hasher.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pickle +import uuid from collections.abc import Iterable, Mapping from typing import Union @@ -34,6 +35,11 @@ class MultiModalHasher: return np.array(obj).tobytes() if isinstance(obj, Image.Image): + exif = obj.getexif() + if Image.ExifTags.Base.ImageID in exif and isinstance( + exif[Image.ExifTags.Base.ImageID], uuid.UUID): + # If the image has exif ImageID tag, use that + return exif[Image.ExifTags.Base.ImageID].bytes return cls.item_to_bytes( "image", np.asarray(convert_image_mode(obj, "RGBA"))) if isinstance(obj, torch.Tensor):