From 23114d33640175229a395b9ed1128c3a41ad65d9 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 25 Mar 2025 23:31:04 -0400 Subject: [PATCH] [Misc] Warn about v0 in benchmark_paged_attn.py (#15495) Signed-off-by: Tyler Michael Smith --- benchmarks/kernels/benchmark_paged_attention.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/benchmarks/kernels/benchmark_paged_attention.py b/benchmarks/kernels/benchmark_paged_attention.py index 48b351bc48141..2625239b08ef2 100644 --- a/benchmarks/kernels/benchmark_paged_attention.py +++ b/benchmarks/kernels/benchmark_paged_attention.py @@ -7,10 +7,13 @@ from typing import Optional import torch from vllm import _custom_ops as ops +from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser, create_kv_caches_with_random) +logger = init_logger(__name__) + NUM_BLOCKS = 128 * 1024 PARTITION_SIZE = 512 PARTITION_SIZE_ROCM = 256 @@ -193,6 +196,9 @@ def main( if __name__ == '__main__': + logger.warning("This script benchmarks the paged attention kernel. " + "By default this is no longer used in vLLM inference.") + parser = FlexibleArgumentParser( description="Benchmark the paged attention kernel.") parser.add_argument("--version",