mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-28 10:37:13 +08:00
Improve multimodal hasher performance for re-used Image prompts (#22825)
Signed-off-by: Staszek Pasko <staszek@gmail.com>
This commit is contained in:
parent
49252cf59e
commit
22341b996e
@ -1,5 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
@ -72,3 +73,22 @@ def test_hash_non_contiguous_array():
|
||||
hasher = MultiModalHasher
|
||||
# Both should be hashable and produce the same hashes
|
||||
assert hasher.hash_kwargs(data=arr) == hasher.hash_kwargs(data=arr_c)
|
||||
|
||||
|
||||
def test_hash_image_exif_id():
|
||||
# Test that EXIF ImageId tag can be used to store UUID
|
||||
# and the hasher will use that instead of the image data.
|
||||
image1 = image2 = Image.new("1", size=(10, 20))
|
||||
id = uuid.uuid4()
|
||||
image1.getexif()[Image.ExifTags.Base.ImageID] = id
|
||||
image2 = Image.open(ASSETS_DIR / "image1.png")
|
||||
image2.getexif()[Image.ExifTags.Base.ImageID] = "Not a UUID"
|
||||
image2a = Image.open(ASSETS_DIR / "image1.png")
|
||||
|
||||
hasher = MultiModalHasher
|
||||
# first image has UUID in ImageID, so it should hash to that UUID
|
||||
assert hasher.hash_kwargs(image=image1) == hasher.hash_kwargs(
|
||||
image=id.bytes)
|
||||
# second image has non-UUID in ImageID, so it should hash to the image data
|
||||
assert hasher.hash_kwargs(image=image2) == hasher.hash_kwargs(
|
||||
image=image2a)
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pickle
|
||||
import uuid
|
||||
from collections.abc import Iterable, Mapping
|
||||
from typing import Union
|
||||
|
||||
@ -34,6 +35,11 @@ class MultiModalHasher:
|
||||
return np.array(obj).tobytes()
|
||||
|
||||
if isinstance(obj, Image.Image):
|
||||
exif = obj.getexif()
|
||||
if Image.ExifTags.Base.ImageID in exif and isinstance(
|
||||
exif[Image.ExifTags.Base.ImageID], uuid.UUID):
|
||||
# If the image has exif ImageID tag, use that
|
||||
return exif[Image.ExifTags.Base.ImageID].bytes
|
||||
return cls.item_to_bytes(
|
||||
"image", np.asarray(convert_image_mode(obj, "RGBA")))
|
||||
if isinstance(obj, torch.Tensor):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user