mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-17 11:27:11 +08:00
Merge b9590323e284b13fe9c2a9e69f7cfb5b483f089e into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
be87c0fed2
@ -93,3 +93,120 @@ def test_hash_image_exif_id():
|
||||
assert hasher.hash_kwargs(image=image1) == hasher.hash_kwargs(image=id.bytes)
|
||||
# second image has non-UUID in ImageID, so it should hash to the image data
|
||||
assert hasher.hash_kwargs(image=image2) == hasher.hash_kwargs(image=image2a)
|
||||
|
||||
|
||||
# Tests for FIPS 140-3 compliant hashing support
|
||||
class TestFIPSHashing:
|
||||
"""Test FIPS-compliant SHA-256 hashing functionality."""
|
||||
|
||||
def test_sha256_hasher_basic(self):
|
||||
"""Test that _Sha256Hasher produces valid hashes."""
|
||||
from vllm.multimodal.hasher import _Sha256Hasher
|
||||
|
||||
hasher = _Sha256Hasher()
|
||||
hasher.update(b"test data")
|
||||
result = hasher.hexdigest()
|
||||
|
||||
# SHA-256 produces 64-character hex digest
|
||||
assert len(result) == 64
|
||||
assert all(c in "0123456789abcdef" for c in result)
|
||||
|
||||
def test_sha256_hasher_memoryview(self):
|
||||
"""Test that _Sha256Hasher handles memoryview correctly."""
|
||||
from vllm.multimodal.hasher import _Sha256Hasher
|
||||
|
||||
data = b"test data"
|
||||
mv = memoryview(data)
|
||||
|
||||
hasher1 = _Sha256Hasher()
|
||||
hasher1.update(data)
|
||||
|
||||
hasher2 = _Sha256Hasher()
|
||||
hasher2.update(mv)
|
||||
|
||||
assert hasher1.hexdigest() == hasher2.hexdigest()
|
||||
|
||||
def test_blake3_hasher_basic(self):
|
||||
"""Test that _Blake3Hasher produces valid hashes when available."""
|
||||
from vllm.multimodal.hasher import _HAS_BLAKE3, _Blake3Hasher
|
||||
|
||||
if not _HAS_BLAKE3:
|
||||
pytest.skip("blake3 not available")
|
||||
|
||||
hasher = _Blake3Hasher()
|
||||
hasher.update(b"test data")
|
||||
result = hasher.hexdigest()
|
||||
|
||||
# blake3 also produces 64-character hex digest by default
|
||||
assert len(result) == 64
|
||||
assert all(c in "0123456789abcdef" for c in result)
|
||||
|
||||
def test_blake3_and_sha256_produce_different_hashes(self):
|
||||
"""Test that blake3 and SHA-256 produce different hashes for same input."""
|
||||
from vllm.multimodal.hasher import _HAS_BLAKE3, _Blake3Hasher, _Sha256Hasher
|
||||
|
||||
if not _HAS_BLAKE3:
|
||||
pytest.skip("blake3 not available")
|
||||
|
||||
data = b"test data for hashing"
|
||||
|
||||
blake3_hasher = _Blake3Hasher()
|
||||
blake3_hasher.update(data)
|
||||
|
||||
sha256_hasher = _Sha256Hasher()
|
||||
sha256_hasher.update(data)
|
||||
|
||||
# Different algorithms should produce different hashes
|
||||
assert blake3_hasher.hexdigest() != sha256_hasher.hexdigest()
|
||||
|
||||
def test_create_hasher_returns_correct_type(self):
|
||||
"""Test that _create_hasher returns appropriate hasher type."""
|
||||
from vllm.multimodal.hasher import (
|
||||
_USE_FIPS_HASHING,
|
||||
_Blake3Hasher,
|
||||
_create_hasher,
|
||||
_Sha256Hasher,
|
||||
)
|
||||
|
||||
hasher = _create_hasher()
|
||||
|
||||
if _USE_FIPS_HASHING:
|
||||
assert isinstance(hasher, _Sha256Hasher)
|
||||
else:
|
||||
assert isinstance(hasher, _Blake3Hasher)
|
||||
|
||||
def test_hash_kwargs_consistency_with_fips(self):
|
||||
"""Test that hash_kwargs produces consistent results."""
|
||||
data = {"key1": "value1", "key2": 42, "key3": b"bytes"}
|
||||
|
||||
hash1 = MultiModalHasher.hash_kwargs(**data)
|
||||
hash2 = MultiModalHasher.hash_kwargs(**data)
|
||||
|
||||
assert hash1 == hash2
|
||||
|
||||
def test_hash_kwargs_with_image_fips(self):
|
||||
"""Test that image hashing works in FIPS mode."""
|
||||
image = Image.new("RGB", size=(10, 10), color=(255, 0, 0))
|
||||
|
||||
# Should not raise an exception
|
||||
result = MultiModalHasher.hash_kwargs(image=image)
|
||||
assert isinstance(result, str)
|
||||
assert len(result) == 64
|
||||
|
||||
def test_hash_kwargs_with_tensor_fips(self):
|
||||
"""Test that tensor hashing works in FIPS mode."""
|
||||
tensor = torch.zeros((5, 10, 20), dtype=torch.float32)
|
||||
|
||||
# Should not raise an exception
|
||||
result = MultiModalHasher.hash_kwargs(data=tensor)
|
||||
assert isinstance(result, str)
|
||||
assert len(result) == 64
|
||||
|
||||
def test_hash_kwargs_with_numpy_array_fips(self):
|
||||
"""Test that numpy array hashing works in FIPS mode."""
|
||||
arr = np.zeros((5, 10, 20))
|
||||
|
||||
# Should not raise an exception
|
||||
result = MultiModalHasher.hash_kwargs(data=arr)
|
||||
assert isinstance(result, str)
|
||||
assert len(result) == 64
|
||||
|
||||
@ -1,13 +1,29 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Multimodal content hashing utilities.
|
||||
|
||||
This module provides hashing functionality for multimodal content (images,
|
||||
tensors, etc.) used in cache key generation. It supports both high-performance
|
||||
blake3 hashing and FIPS 140-3 compliant SHA-256 hashing.
|
||||
|
||||
FIPS Compliance:
|
||||
blake3 is not FIPS 140-3 approved. For environments requiring FIPS
|
||||
compliance (government, healthcare, finance), set the environment
|
||||
variable VLLM_USE_FIPS_HASHING=1 to use SHA-256 instead.
|
||||
|
||||
Environment Variables:
|
||||
VLLM_USE_FIPS_HASHING: Set to "1", "true", or "yes" to enable
|
||||
FIPS-compliant SHA-256 hashing instead of blake3.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import pickle
|
||||
import uuid
|
||||
from collections.abc import Iterable
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from blake3 import blake3
|
||||
from PIL import Image
|
||||
|
||||
from vllm.logger import init_logger
|
||||
@ -16,16 +32,96 @@ from .base import MediaWithBytes
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# blake3 is optional - not FIPS 140-3 approved
|
||||
# In FIPS-constrained environments, blake3 may not be available or allowed
|
||||
try:
|
||||
from blake3 import blake3 as _blake3
|
||||
|
||||
_HAS_BLAKE3 = True
|
||||
except ImportError:
|
||||
_blake3 = None
|
||||
_HAS_BLAKE3 = False
|
||||
|
||||
|
||||
def _use_fips_hashing() -> bool:
|
||||
"""Determine whether to use FIPS-compliant hashing.
|
||||
|
||||
Returns True if:
|
||||
- VLLM_USE_FIPS_HASHING environment variable is set to a truthy value
|
||||
- blake3 is not available (automatic fallback)
|
||||
|
||||
Returns:
|
||||
bool: True if FIPS-compliant SHA-256 should be used, False for blake3.
|
||||
"""
|
||||
fips_env = os.environ.get("VLLM_USE_FIPS_HASHING", "0")
|
||||
use_fips = fips_env.lower() in ("1", "true", "yes")
|
||||
|
||||
if use_fips:
|
||||
logger.info("FIPS-compliant hashing enabled via VLLM_USE_FIPS_HASHING")
|
||||
elif not _HAS_BLAKE3:
|
||||
logger.info("blake3 not available, using FIPS-compliant SHA-256 hashing")
|
||||
|
||||
return use_fips or not _HAS_BLAKE3
|
||||
|
||||
|
||||
_USE_FIPS_HASHING = _use_fips_hashing()
|
||||
|
||||
|
||||
class _Blake3Hasher:
|
||||
"""Wrapper for blake3 hasher with consistent interface."""
|
||||
|
||||
def __init__(self):
|
||||
if _blake3 is None:
|
||||
raise RuntimeError("blake3 is not available")
|
||||
self._hasher = _blake3()
|
||||
|
||||
def update(self, data: bytes | memoryview) -> None:
|
||||
self._hasher.update(data)
|
||||
|
||||
def hexdigest(self) -> str:
|
||||
return self._hasher.hexdigest()
|
||||
|
||||
|
||||
class _Sha256Hasher:
|
||||
"""FIPS 140-3 compliant SHA-256 hasher with consistent interface.
|
||||
|
||||
This provides the same interface as _Blake3Hasher but uses the
|
||||
FIPS-approved SHA-256 algorithm from hashlib.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._hasher = hashlib.sha256()
|
||||
|
||||
def update(self, data: bytes | memoryview) -> None:
|
||||
# hashlib requires bytes, not memoryview
|
||||
if isinstance(data, memoryview):
|
||||
data = bytes(data)
|
||||
self._hasher.update(data)
|
||||
|
||||
def hexdigest(self) -> str:
|
||||
return self._hasher.hexdigest()
|
||||
|
||||
|
||||
def _create_hasher() -> _Blake3Hasher | _Sha256Hasher:
|
||||
"""Create the appropriate hasher based on FIPS configuration.
|
||||
|
||||
Returns:
|
||||
A hasher instance with update() and hexdigest() methods.
|
||||
"""
|
||||
if _USE_FIPS_HASHING:
|
||||
return _Sha256Hasher()
|
||||
return _Blake3Hasher()
|
||||
|
||||
|
||||
class MultiModalHasher:
|
||||
@classmethod
|
||||
def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]:
|
||||
# Simple cases
|
||||
if isinstance(obj, (bytes, memoryview)):
|
||||
if isinstance(obj, bytes | memoryview):
|
||||
return (obj,)
|
||||
if isinstance(obj, str):
|
||||
return (obj.encode("utf-8"),)
|
||||
if isinstance(obj, (int, float)):
|
||||
if isinstance(obj, int | float):
|
||||
return (np.array(obj).tobytes(),)
|
||||
|
||||
if isinstance(obj, Image.Image):
|
||||
@ -99,7 +195,7 @@ class MultiModalHasher:
|
||||
obj: object,
|
||||
) -> Iterable[bytes | memoryview]:
|
||||
# Recursive cases
|
||||
if isinstance(obj, (list, tuple)):
|
||||
if isinstance(obj, list | tuple):
|
||||
for i, elem in enumerate(obj):
|
||||
yield from cls.iter_item_to_bytes(f"{key}.{i}", elem)
|
||||
elif isinstance(obj, dict):
|
||||
@ -111,7 +207,7 @@ class MultiModalHasher:
|
||||
|
||||
@classmethod
|
||||
def hash_kwargs(cls, **kwargs: object) -> str:
|
||||
hasher = blake3()
|
||||
hasher = _create_hasher()
|
||||
|
||||
for k, v in kwargs.items():
|
||||
for bytes_ in cls.iter_item_to_bytes(k, v):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user