From 8a297115e2367d463b781adb86b55ac740594cf6 Mon Sep 17 00:00:00 2001 From: dongbo910220 <32610838+dongbo910220@users.noreply.github.com> Date: Sun, 19 Oct 2025 11:09:38 +0800 Subject: [PATCH] [Chore] Separate out hashing utilities from vllm.utils (#27151) Signed-off-by: dongbo910220 <1275604947@qq.com> --- tests/utils_/test_utils.py | 3 +- tests/v1/core/test_kv_cache_utils.py | 2 +- tests/v1/core/test_prefix_caching.py | 2 +- tests/v1/core/utils.py | 2 +- .../unit/test_offloading_connector.py | 2 +- tests/v1/kv_connector/unit/utils.py | 2 +- tools/pre_commit/check_pickle_imports.py | 1 + vllm/utils/__init__.py | 54 +--------------- vllm/utils/hashing.py | 63 +++++++++++++++++++ vllm/v1/core/kv_cache_utils.py | 3 +- vllm/v1/engine/core.py | 2 +- 11 files changed, 74 insertions(+), 62 deletions(-) create mode 100644 vllm/utils/hashing.py diff --git a/tests/utils_/test_utils.py b/tests/utils_/test_utils.py index b3a27460df86c..1328790d4a9d4 100644 --- a/tests/utils_/test_utils.py +++ b/tests/utils_/test_utils.py @@ -29,17 +29,16 @@ from vllm.utils import ( join_host_port, make_zmq_path, make_zmq_socket, - sha256, split_host_port, split_zmq_path, unique_filepath, ) +from vllm.utils.hashing import sha256 from vllm.utils.torch_utils import ( common_broadcastable_dtype, current_stream, is_lossless_cast, ) - from vllm.utils.mem_utils import MemorySnapshot, memory_profiling from ..utils import create_new_process_for_each_test, flat_product diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index 678ec25464776..6558267c13a31 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -14,7 +14,7 @@ from vllm.multimodal.inputs import ( PlaceholderRange, ) from vllm.sampling_params import SamplingParams -from vllm.utils import sha256, sha256_cbor +from vllm.utils.hashing import sha256, sha256_cbor from vllm.utils.mem_constants import GiB_bytes from vllm.v1.core.kv_cache_manager import KVCacheManager from vllm.v1.core.kv_cache_utils import ( diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py index a81644ce252ea..837a513cb75e1 100644 --- a/tests/v1/core/test_prefix_caching.py +++ b/tests/v1/core/test_prefix_caching.py @@ -16,7 +16,7 @@ from vllm.multimodal.inputs import ( PlaceholderRange, ) from vllm.sampling_params import SamplingParams -from vllm.utils import sha256, sha256_cbor +from vllm.utils.hashing import sha256, sha256_cbor from vllm.v1.core.block_pool import BlockHashToBlockMap, BlockPool from vllm.v1.core.kv_cache_manager import KVCacheManager, Request from vllm.v1.core.kv_cache_utils import ( diff --git a/tests/v1/core/utils.py b/tests/v1/core/utils.py index c7df43359381b..6e739d6b0e77a 100644 --- a/tests/v1/core/utils.py +++ b/tests/v1/core/utils.py @@ -17,7 +17,7 @@ from vllm.multimodal.inputs import ( PlaceholderRange, ) from vllm.sampling_params import SamplingParams -from vllm.utils import sha256 +from vllm.utils.hashing import sha256 from vllm.v1.core.kv_cache_utils import get_request_block_hasher, init_none_hash from vllm.v1.core.sched.async_scheduler import AsyncScheduler from vllm.v1.core.sched.scheduler import Scheduler diff --git a/tests/v1/kv_connector/unit/test_offloading_connector.py b/tests/v1/kv_connector/unit/test_offloading_connector.py index 46a5c097094eb..23b6c4802d106 100644 --- a/tests/v1/kv_connector/unit/test_offloading_connector.py +++ b/tests/v1/kv_connector/unit/test_offloading_connector.py @@ -18,7 +18,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.offloading_connector import ( OffloadingConnectorMetadata, ) from vllm.forward_context import ForwardContext -from vllm.utils import sha256 +from vllm.utils.hashing import sha256 from vllm.v1.core.kv_cache_utils import ( BlockHash, get_request_block_hasher, diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py index e7f505d55e7a4..e3f30bd7698f8 100644 --- a/tests/v1/kv_connector/unit/utils.py +++ b/tests/v1/kv_connector/unit/utils.py @@ -21,7 +21,7 @@ from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory from vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector import ( # noqa SharedStorageConnector, ) -from vllm.utils import sha256 +from vllm.utils.hashing import sha256 from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.core.kv_cache_utils import get_request_block_hasher, init_none_hash from vllm.v1.core.sched.scheduler import Scheduler diff --git a/tools/pre_commit/check_pickle_imports.py b/tools/pre_commit/check_pickle_imports.py index 7944b7c9b275c..6a5ac40d64857 100644 --- a/tools/pre_commit/check_pickle_imports.py +++ b/tools/pre_commit/check_pickle_imports.py @@ -43,6 +43,7 @@ ALLOWED_FILES = { "tests/utils.py", # pickle and cloudpickle "vllm/utils/__init__.py", + "vllm/utils/hashing.py", } PICKLE_RE = re.compile( diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 33690d86da045..94559ab1c46d9 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -5,14 +5,12 @@ import contextlib import datetime import enum import getpass -import hashlib import importlib import inspect import ipaddress import json import multiprocessing import os -import pickle import signal import socket import subprocess @@ -45,7 +43,6 @@ from typing import TYPE_CHECKING, Any, TextIO, TypeVar from urllib.parse import urlparse from uuid import uuid4 -import cbor2 import cloudpickle import psutil import regex as re @@ -1463,56 +1460,7 @@ def check_use_alibi(model_config: ModelConfig) -> bool: ) -def sha256(input: Any) -> bytes: - """Hash any picklable Python object using SHA-256. - - The input is serialized using pickle before hashing, which allows - arbitrary Python objects to be used. Note that this function does - not use a hash seed—if you need one, prepend it explicitly to the input. - - Args: - input: Any picklable Python object. - - Returns: - Bytes representing the SHA-256 hash of the serialized input. - """ - input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL) - return hashlib.sha256(input_bytes).digest() - - -def sha256_cbor(input: Any) -> bytes: - """ - Hash objects using CBOR serialization and SHA-256. - - This option is useful for non-Python-dependent serialization and hashing. - - Args: - input: Object to be serialized and hashed. Supported types include - basic Python types and complex structures like lists, tuples, and - dictionaries. - Custom classes must implement CBOR serialization methods. - - Returns: - Bytes representing the SHA-256 hash of the CBOR serialized input. - """ - input_bytes = cbor2.dumps(input, canonical=True) - return hashlib.sha256(input_bytes).digest() - - -def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]: - """Get a hash function by name, or raise an error if - the function is not found. - Args: - hash_fn_name: Name of the hash function. - Returns: - A hash function. - """ - if hash_fn_name == "sha256": - return sha256 - if hash_fn_name == "sha256_cbor": - return sha256_cbor - - raise ValueError(f"Unsupported hash function: {hash_fn_name}") +## moved to vllm.utils.hashing @cache diff --git a/vllm/utils/hashing.py b/vllm/utils/hashing.py new file mode 100644 index 0000000000000..49f4f13d115f3 --- /dev/null +++ b/vllm/utils/hashing.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from __future__ import annotations + +import hashlib +import pickle +from collections.abc import Callable +from typing import Any + +import cbor2 + + +def sha256(input: Any) -> bytes: + """Hash any picklable Python object using SHA-256. + + The input is serialized using pickle before hashing, which allows + arbitrary Python objects to be used. Note that this function does + not use a hash seed—if you need one, prepend it explicitly to the input. + + Args: + input: Any picklable Python object. + + Returns: + Bytes representing the SHA-256 hash of the serialized input. + """ + input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL) + return hashlib.sha256(input_bytes).digest() + + +def sha256_cbor(input: Any) -> bytes: + """Hash objects using CBOR serialization and SHA-256. + + This option is useful for non-Python-dependent serialization and hashing. + + Args: + input: Object to be serialized and hashed. Supported types include + basic Python types and complex structures like lists, tuples, and + dictionaries. + Custom classes must implement CBOR serialization methods. + + Returns: + Bytes representing the SHA-256 hash of the CBOR serialized input. + """ + input_bytes = cbor2.dumps(input, canonical=True) + return hashlib.sha256(input_bytes).digest() + + +def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]: + """Get a hash function by name, or raise an error if the function is not found. + + Args: + hash_fn_name: Name of the hash function. + + Returns: + A hash function. + """ + if hash_fn_name == "sha256": + return sha256 + if hash_fn_name == "sha256_cbor": + return sha256_cbor + + raise ValueError(f"Unsupported hash function: {hash_fn_name}") diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 01b88d1de8d8c..6870e7ebde375 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -12,7 +12,8 @@ from typing import Any, NewType, TypeAlias from vllm import envs from vllm.config import VllmConfig from vllm.logger import init_logger -from vllm.utils import cdiv, sha256_cbor +from vllm.utils import cdiv +from vllm.utils.hashing import sha256_cbor from vllm.utils.mem_constants import GiB_bytes from vllm.v1.kv_cache_interface import ( ChunkedLocalAttentionSpec, diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 2773dc61ff3d7..eb18fb5c43202 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -30,11 +30,11 @@ from vllm.tasks import POOLING_TASKS, SupportedTask from vllm.transformers_utils.config import maybe_register_config_serialize_by_value from vllm.utils import ( decorate_logs, - get_hash_fn_by_name, make_zmq_socket, set_process_title, ) from vllm.utils.gc_utils import maybe_attach_gc_debug_callback +from vllm.utils.hashing import get_hash_fn_by_name from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.v1.core.kv_cache_utils import ( BlockHash,