From b9590323e284b13fe9c2a9e69f7cfb5b483f089e Mon Sep 17 00:00:00 2001 From: yurekami <249254018+yurekami@users.noreply.github.com> Date: Thu, 18 Dec 2025 14:06:21 +0900 Subject: [PATCH] [Multimodal] Add FIPS 140-3 compliant hashing support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds FIPS 140-3 compliant SHA-256 hashing as an alternative to blake3 for multimodal content hashing. This enables vLLM usage in government, healthcare, and financial environments that require FIPS-approved cryptographic algorithms. Changes: - Add _Sha256Hasher wrapper class with consistent interface - Add _Blake3Hasher wrapper class for optional blake3 usage - Add _create_hasher() factory function for hasher selection - Add VLLM_USE_FIPS_HASHING environment variable control - Automatic fallback to SHA-256 when blake3 is unavailable - Add comprehensive test suite for FIPS hashing functionality Fixes #18334 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 Signed-off-by: yurekami <249254018+yurekami@users.noreply.github.com> --- tests/multimodal/test_hasher.py | 117 ++++++++++++++++++++++++++++++++ vllm/multimodal/hasher.py | 106 +++++++++++++++++++++++++++-- 2 files changed, 218 insertions(+), 5 deletions(-) diff --git a/tests/multimodal/test_hasher.py b/tests/multimodal/test_hasher.py index 29064f2737834..ba1534e088974 100644 --- a/tests/multimodal/test_hasher.py +++ b/tests/multimodal/test_hasher.py @@ -93,3 +93,120 @@ def test_hash_image_exif_id(): assert hasher.hash_kwargs(image=image1) == hasher.hash_kwargs(image=id.bytes) # second image has non-UUID in ImageID, so it should hash to the image data assert hasher.hash_kwargs(image=image2) == hasher.hash_kwargs(image=image2a) + + +# Tests for FIPS 140-3 compliant hashing support +class TestFIPSHashing: + """Test FIPS-compliant SHA-256 hashing functionality.""" + + def test_sha256_hasher_basic(self): + """Test that _Sha256Hasher produces valid hashes.""" + from vllm.multimodal.hasher import _Sha256Hasher + + hasher = _Sha256Hasher() + hasher.update(b"test data") + result = hasher.hexdigest() + + # SHA-256 produces 64-character hex digest + assert len(result) == 64 + assert all(c in "0123456789abcdef" for c in result) + + def test_sha256_hasher_memoryview(self): + """Test that _Sha256Hasher handles memoryview correctly.""" + from vllm.multimodal.hasher import _Sha256Hasher + + data = b"test data" + mv = memoryview(data) + + hasher1 = _Sha256Hasher() + hasher1.update(data) + + hasher2 = _Sha256Hasher() + hasher2.update(mv) + + assert hasher1.hexdigest() == hasher2.hexdigest() + + def test_blake3_hasher_basic(self): + """Test that _Blake3Hasher produces valid hashes when available.""" + from vllm.multimodal.hasher import _HAS_BLAKE3, _Blake3Hasher + + if not _HAS_BLAKE3: + pytest.skip("blake3 not available") + + hasher = _Blake3Hasher() + hasher.update(b"test data") + result = hasher.hexdigest() + + # blake3 also produces 64-character hex digest by default + assert len(result) == 64 + assert all(c in "0123456789abcdef" for c in result) + + def test_blake3_and_sha256_produce_different_hashes(self): + """Test that blake3 and SHA-256 produce different hashes for same input.""" + from vllm.multimodal.hasher import _HAS_BLAKE3, _Blake3Hasher, _Sha256Hasher + + if not _HAS_BLAKE3: + pytest.skip("blake3 not available") + + data = b"test data for hashing" + + blake3_hasher = _Blake3Hasher() + blake3_hasher.update(data) + + sha256_hasher = _Sha256Hasher() + sha256_hasher.update(data) + + # Different algorithms should produce different hashes + assert blake3_hasher.hexdigest() != sha256_hasher.hexdigest() + + def test_create_hasher_returns_correct_type(self): + """Test that _create_hasher returns appropriate hasher type.""" + from vllm.multimodal.hasher import ( + _USE_FIPS_HASHING, + _Blake3Hasher, + _create_hasher, + _Sha256Hasher, + ) + + hasher = _create_hasher() + + if _USE_FIPS_HASHING: + assert isinstance(hasher, _Sha256Hasher) + else: + assert isinstance(hasher, _Blake3Hasher) + + def test_hash_kwargs_consistency_with_fips(self): + """Test that hash_kwargs produces consistent results.""" + data = {"key1": "value1", "key2": 42, "key3": b"bytes"} + + hash1 = MultiModalHasher.hash_kwargs(**data) + hash2 = MultiModalHasher.hash_kwargs(**data) + + assert hash1 == hash2 + + def test_hash_kwargs_with_image_fips(self): + """Test that image hashing works in FIPS mode.""" + image = Image.new("RGB", size=(10, 10), color=(255, 0, 0)) + + # Should not raise an exception + result = MultiModalHasher.hash_kwargs(image=image) + assert isinstance(result, str) + assert len(result) == 64 + + def test_hash_kwargs_with_tensor_fips(self): + """Test that tensor hashing works in FIPS mode.""" + tensor = torch.zeros((5, 10, 20), dtype=torch.float32) + + # Should not raise an exception + result = MultiModalHasher.hash_kwargs(data=tensor) + assert isinstance(result, str) + assert len(result) == 64 + + def test_hash_kwargs_with_numpy_array_fips(self): + """Test that numpy array hashing works in FIPS mode.""" + arr = np.zeros((5, 10, 20)) + + # Should not raise an exception + result = MultiModalHasher.hash_kwargs(data=arr) + assert isinstance(result, str) + assert len(result) == 64 diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py index cc50322fed902..26cf36d1550be 100644 --- a/vllm/multimodal/hasher.py +++ b/vllm/multimodal/hasher.py @@ -1,13 +1,29 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Multimodal content hashing utilities. +This module provides hashing functionality for multimodal content (images, +tensors, etc.) used in cache key generation. It supports both high-performance +blake3 hashing and FIPS 140-3 compliant SHA-256 hashing. + +FIPS Compliance: + blake3 is not FIPS 140-3 approved. For environments requiring FIPS + compliance (government, healthcare, finance), set the environment + variable VLLM_USE_FIPS_HASHING=1 to use SHA-256 instead. + +Environment Variables: + VLLM_USE_FIPS_HASHING: Set to "1", "true", or "yes" to enable + FIPS-compliant SHA-256 hashing instead of blake3. +""" + +import hashlib +import os import pickle import uuid from collections.abc import Iterable import numpy as np import torch -from blake3 import blake3 from PIL import Image from vllm.logger import init_logger @@ -16,16 +32,96 @@ from .base import MediaWithBytes logger = init_logger(__name__) +# blake3 is optional - not FIPS 140-3 approved +# In FIPS-constrained environments, blake3 may not be available or allowed +try: + from blake3 import blake3 as _blake3 + + _HAS_BLAKE3 = True +except ImportError: + _blake3 = None + _HAS_BLAKE3 = False + + +def _use_fips_hashing() -> bool: + """Determine whether to use FIPS-compliant hashing. + + Returns True if: + - VLLM_USE_FIPS_HASHING environment variable is set to a truthy value + - blake3 is not available (automatic fallback) + + Returns: + bool: True if FIPS-compliant SHA-256 should be used, False for blake3. + """ + fips_env = os.environ.get("VLLM_USE_FIPS_HASHING", "0") + use_fips = fips_env.lower() in ("1", "true", "yes") + + if use_fips: + logger.info("FIPS-compliant hashing enabled via VLLM_USE_FIPS_HASHING") + elif not _HAS_BLAKE3: + logger.info("blake3 not available, using FIPS-compliant SHA-256 hashing") + + return use_fips or not _HAS_BLAKE3 + + +_USE_FIPS_HASHING = _use_fips_hashing() + + +class _Blake3Hasher: + """Wrapper for blake3 hasher with consistent interface.""" + + def __init__(self): + if _blake3 is None: + raise RuntimeError("blake3 is not available") + self._hasher = _blake3() + + def update(self, data: bytes | memoryview) -> None: + self._hasher.update(data) + + def hexdigest(self) -> str: + return self._hasher.hexdigest() + + +class _Sha256Hasher: + """FIPS 140-3 compliant SHA-256 hasher with consistent interface. + + This provides the same interface as _Blake3Hasher but uses the + FIPS-approved SHA-256 algorithm from hashlib. + """ + + def __init__(self): + self._hasher = hashlib.sha256() + + def update(self, data: bytes | memoryview) -> None: + # hashlib requires bytes, not memoryview + if isinstance(data, memoryview): + data = bytes(data) + self._hasher.update(data) + + def hexdigest(self) -> str: + return self._hasher.hexdigest() + + +def _create_hasher() -> _Blake3Hasher | _Sha256Hasher: + """Create the appropriate hasher based on FIPS configuration. + + Returns: + A hasher instance with update() and hexdigest() methods. + """ + if _USE_FIPS_HASHING: + return _Sha256Hasher() + return _Blake3Hasher() + class MultiModalHasher: @classmethod def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]: # Simple cases - if isinstance(obj, (bytes, memoryview)): + if isinstance(obj, bytes | memoryview): return (obj,) if isinstance(obj, str): return (obj.encode("utf-8"),) - if isinstance(obj, (int, float)): + if isinstance(obj, int | float): return (np.array(obj).tobytes(),) if isinstance(obj, Image.Image): @@ -99,7 +195,7 @@ class MultiModalHasher: obj: object, ) -> Iterable[bytes | memoryview]: # Recursive cases - if isinstance(obj, (list, tuple)): + if isinstance(obj, list | tuple): for i, elem in enumerate(obj): yield from cls.iter_item_to_bytes(f"{key}.{i}", elem) elif isinstance(obj, dict): @@ -111,7 +207,7 @@ class MultiModalHasher: @classmethod def hash_kwargs(cls, **kwargs: object) -> str: - hasher = blake3() + hasher = _create_hasher() for k, v in kwargs.items(): for bytes_ in cls.iter_item_to_bytes(k, v):