mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 01:04:57 +08:00
Signed-off-by: LuminolT <lumischen01@gmail.com> Signed-off-by: Lumis Chen <lumischen01@gmail.com> Co-authored-by: Russell Bryant <rbryant@redhat.com>
121 lines
3.5 KiB
Python
121 lines
3.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
"""
|
|
Micro benchmark comparing built-in hash(), SHA-256, and xxHash.
|
|
|
|
This focuses on a single test payload shaped like the prefix-cache hash input:
|
|
(32-byte bytes object, 32-int tuple)
|
|
|
|
Usage:
|
|
python benchmarks/hash_micro_benchmark.py --iterations 20000
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import random
|
|
import statistics
|
|
import time
|
|
from collections.abc import Callable, Iterable
|
|
|
|
from vllm.utils.hashing import sha256, xxhash
|
|
|
|
|
|
def _generate_test_data(seed: int) -> tuple[bytes, tuple[int, ...]]:
|
|
"""Generate a deterministic test payload."""
|
|
random.seed(seed)
|
|
bytes_data = bytes(random.getrandbits(8) for _ in range(32))
|
|
int_tuple = tuple(random.randint(1, 1_000_000) for _ in range(32))
|
|
return (bytes_data, int_tuple)
|
|
|
|
|
|
def _benchmark_func(func: Callable[[tuple], object], data: tuple, iterations: int):
|
|
"""Return (avg_seconds, std_seconds) for hashing `data` `iterations` times."""
|
|
times: list[float] = []
|
|
|
|
# Warm-up to avoid first-run noise.
|
|
for _ in range(200):
|
|
func(data)
|
|
|
|
for _ in range(iterations):
|
|
start = time.perf_counter()
|
|
func(data)
|
|
end = time.perf_counter()
|
|
times.append(end - start)
|
|
|
|
avg = statistics.mean(times)
|
|
std = statistics.stdev(times) if len(times) > 1 else 0.0
|
|
return avg, std
|
|
|
|
|
|
def _run_benchmarks(
|
|
benchmarks: Iterable[tuple[str, Callable[[tuple], object]]],
|
|
data: tuple,
|
|
iterations: int,
|
|
):
|
|
"""Yield (name, avg, std) for each benchmark, skipping unavailable ones."""
|
|
for name, func in benchmarks:
|
|
try:
|
|
avg, std = _benchmark_func(func, data, iterations)
|
|
except ModuleNotFoundError as exc:
|
|
print(f"Skipping {name}: {exc}")
|
|
continue
|
|
yield name, avg, std
|
|
|
|
|
|
def builtin_hash(data: tuple) -> int:
|
|
"""Wrapper for Python's built-in hash()."""
|
|
return hash(data)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"--iterations",
|
|
type=int,
|
|
default=10_000,
|
|
help="Number of measured iterations per hash function.",
|
|
)
|
|
parser.add_argument(
|
|
"--seed", type=int, default=42, help="Random seed for test payload."
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
data = _generate_test_data(args.seed)
|
|
benchmarks = (
|
|
("SHA256 (pickle)", sha256),
|
|
("xxHash (pickle)", xxhash),
|
|
("built-in hash()", builtin_hash),
|
|
)
|
|
|
|
print("=" * 60)
|
|
print("HASH FUNCTION MICRO BENCHMARK")
|
|
print("=" * 60)
|
|
print("Test data: (32-byte bytes object, 32-int tuple)")
|
|
print(f"Iterations: {args.iterations:,}")
|
|
print("=" * 60)
|
|
|
|
results = list(_run_benchmarks(benchmarks, data, args.iterations))
|
|
builtin_entry = next((r for r in results if r[0] == "built-in hash()"), None)
|
|
|
|
print("\nResults:")
|
|
for name, avg, std in results:
|
|
print(f" {name:16s}: {avg * 1e6:8.2f} ± {std * 1e6:6.2f} μs")
|
|
|
|
if builtin_entry:
|
|
_, builtin_avg, _ = builtin_entry
|
|
print("\n" + "=" * 60)
|
|
print("SUMMARY (relative to built-in hash())")
|
|
print("=" * 60)
|
|
for name, avg, _ in results:
|
|
if name == "built-in hash()":
|
|
continue
|
|
speed_ratio = avg / builtin_avg
|
|
print(f"• {name} is {speed_ratio:.1f}x slower than built-in hash()")
|
|
else:
|
|
print("\nBuilt-in hash() result missing; cannot compute speed ratios.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|