Merge b9590323e284b13fe9c2a9e69f7cfb5b483f089e into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
ゆり 2025-12-25 00:06:52 +00:00 committed by GitHub
commit be87c0fed2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 218 additions and 5 deletions

View File

@ -93,3 +93,120 @@ def test_hash_image_exif_id():
assert hasher.hash_kwargs(image=image1) == hasher.hash_kwargs(image=id.bytes)
# second image has non-UUID in ImageID, so it should hash to the image data
assert hasher.hash_kwargs(image=image2) == hasher.hash_kwargs(image=image2a)
# Tests for FIPS 140-3 compliant hashing support
class TestFIPSHashing:
"""Test FIPS-compliant SHA-256 hashing functionality."""
def test_sha256_hasher_basic(self):
"""Test that _Sha256Hasher produces valid hashes."""
from vllm.multimodal.hasher import _Sha256Hasher
hasher = _Sha256Hasher()
hasher.update(b"test data")
result = hasher.hexdigest()
# SHA-256 produces 64-character hex digest
assert len(result) == 64
assert all(c in "0123456789abcdef" for c in result)
def test_sha256_hasher_memoryview(self):
"""Test that _Sha256Hasher handles memoryview correctly."""
from vllm.multimodal.hasher import _Sha256Hasher
data = b"test data"
mv = memoryview(data)
hasher1 = _Sha256Hasher()
hasher1.update(data)
hasher2 = _Sha256Hasher()
hasher2.update(mv)
assert hasher1.hexdigest() == hasher2.hexdigest()
def test_blake3_hasher_basic(self):
"""Test that _Blake3Hasher produces valid hashes when available."""
from vllm.multimodal.hasher import _HAS_BLAKE3, _Blake3Hasher
if not _HAS_BLAKE3:
pytest.skip("blake3 not available")
hasher = _Blake3Hasher()
hasher.update(b"test data")
result = hasher.hexdigest()
# blake3 also produces 64-character hex digest by default
assert len(result) == 64
assert all(c in "0123456789abcdef" for c in result)
def test_blake3_and_sha256_produce_different_hashes(self):
"""Test that blake3 and SHA-256 produce different hashes for same input."""
from vllm.multimodal.hasher import _HAS_BLAKE3, _Blake3Hasher, _Sha256Hasher
if not _HAS_BLAKE3:
pytest.skip("blake3 not available")
data = b"test data for hashing"
blake3_hasher = _Blake3Hasher()
blake3_hasher.update(data)
sha256_hasher = _Sha256Hasher()
sha256_hasher.update(data)
# Different algorithms should produce different hashes
assert blake3_hasher.hexdigest() != sha256_hasher.hexdigest()
def test_create_hasher_returns_correct_type(self):
"""Test that _create_hasher returns appropriate hasher type."""
from vllm.multimodal.hasher import (
_USE_FIPS_HASHING,
_Blake3Hasher,
_create_hasher,
_Sha256Hasher,
)
hasher = _create_hasher()
if _USE_FIPS_HASHING:
assert isinstance(hasher, _Sha256Hasher)
else:
assert isinstance(hasher, _Blake3Hasher)
def test_hash_kwargs_consistency_with_fips(self):
"""Test that hash_kwargs produces consistent results."""
data = {"key1": "value1", "key2": 42, "key3": b"bytes"}
hash1 = MultiModalHasher.hash_kwargs(**data)
hash2 = MultiModalHasher.hash_kwargs(**data)
assert hash1 == hash2
def test_hash_kwargs_with_image_fips(self):
"""Test that image hashing works in FIPS mode."""
image = Image.new("RGB", size=(10, 10), color=(255, 0, 0))
# Should not raise an exception
result = MultiModalHasher.hash_kwargs(image=image)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_kwargs_with_tensor_fips(self):
"""Test that tensor hashing works in FIPS mode."""
tensor = torch.zeros((5, 10, 20), dtype=torch.float32)
# Should not raise an exception
result = MultiModalHasher.hash_kwargs(data=tensor)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_kwargs_with_numpy_array_fips(self):
"""Test that numpy array hashing works in FIPS mode."""
arr = np.zeros((5, 10, 20))
# Should not raise an exception
result = MultiModalHasher.hash_kwargs(data=arr)
assert isinstance(result, str)
assert len(result) == 64

View File

@ -1,13 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Multimodal content hashing utilities.
This module provides hashing functionality for multimodal content (images,
tensors, etc.) used in cache key generation. It supports both high-performance
blake3 hashing and FIPS 140-3 compliant SHA-256 hashing.
FIPS Compliance:
blake3 is not FIPS 140-3 approved. For environments requiring FIPS
compliance (government, healthcare, finance), set the environment
variable VLLM_USE_FIPS_HASHING=1 to use SHA-256 instead.
Environment Variables:
VLLM_USE_FIPS_HASHING: Set to "1", "true", or "yes" to enable
FIPS-compliant SHA-256 hashing instead of blake3.
"""
import hashlib
import os
import pickle
import uuid
from collections.abc import Iterable
import numpy as np
import torch
from blake3 import blake3
from PIL import Image
from vllm.logger import init_logger
@ -16,16 +32,96 @@ from .base import MediaWithBytes
logger = init_logger(__name__)
# blake3 is optional - not FIPS 140-3 approved
# In FIPS-constrained environments, blake3 may not be available or allowed
try:
from blake3 import blake3 as _blake3
_HAS_BLAKE3 = True
except ImportError:
_blake3 = None
_HAS_BLAKE3 = False
def _use_fips_hashing() -> bool:
"""Determine whether to use FIPS-compliant hashing.
Returns True if:
- VLLM_USE_FIPS_HASHING environment variable is set to a truthy value
- blake3 is not available (automatic fallback)
Returns:
bool: True if FIPS-compliant SHA-256 should be used, False for blake3.
"""
fips_env = os.environ.get("VLLM_USE_FIPS_HASHING", "0")
use_fips = fips_env.lower() in ("1", "true", "yes")
if use_fips:
logger.info("FIPS-compliant hashing enabled via VLLM_USE_FIPS_HASHING")
elif not _HAS_BLAKE3:
logger.info("blake3 not available, using FIPS-compliant SHA-256 hashing")
return use_fips or not _HAS_BLAKE3
_USE_FIPS_HASHING = _use_fips_hashing()
class _Blake3Hasher:
"""Wrapper for blake3 hasher with consistent interface."""
def __init__(self):
if _blake3 is None:
raise RuntimeError("blake3 is not available")
self._hasher = _blake3()
def update(self, data: bytes | memoryview) -> None:
self._hasher.update(data)
def hexdigest(self) -> str:
return self._hasher.hexdigest()
class _Sha256Hasher:
"""FIPS 140-3 compliant SHA-256 hasher with consistent interface.
This provides the same interface as _Blake3Hasher but uses the
FIPS-approved SHA-256 algorithm from hashlib.
"""
def __init__(self):
self._hasher = hashlib.sha256()
def update(self, data: bytes | memoryview) -> None:
# hashlib requires bytes, not memoryview
if isinstance(data, memoryview):
data = bytes(data)
self._hasher.update(data)
def hexdigest(self) -> str:
return self._hasher.hexdigest()
def _create_hasher() -> _Blake3Hasher | _Sha256Hasher:
"""Create the appropriate hasher based on FIPS configuration.
Returns:
A hasher instance with update() and hexdigest() methods.
"""
if _USE_FIPS_HASHING:
return _Sha256Hasher()
return _Blake3Hasher()
class MultiModalHasher:
@classmethod
def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]:
# Simple cases
if isinstance(obj, (bytes, memoryview)):
if isinstance(obj, bytes | memoryview):
return (obj,)
if isinstance(obj, str):
return (obj.encode("utf-8"),)
if isinstance(obj, (int, float)):
if isinstance(obj, int | float):
return (np.array(obj).tobytes(),)
if isinstance(obj, Image.Image):
@ -99,7 +195,7 @@ class MultiModalHasher:
obj: object,
) -> Iterable[bytes | memoryview]:
# Recursive cases
if isinstance(obj, (list, tuple)):
if isinstance(obj, list | tuple):
for i, elem in enumerate(obj):
yield from cls.iter_item_to_bytes(f"{key}.{i}", elem)
elif isinstance(obj, dict):
@ -111,7 +207,7 @@ class MultiModalHasher:
@classmethod
def hash_kwargs(cls, **kwargs: object) -> str:
hasher = blake3()
hasher = _create_hasher()
for k, v in kwargs.items():
for bytes_ in cls.iter_item_to_bytes(k, v):