[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
2025-12-22 10:45:54 +08:00 · 2025-03-25 23:31:04 -04:00 · 2025-03-25 23:31:04 -04:00 · 23114d3364
commit 23114d3364
parent 997c8811d6
1 changed files with 6 additions and 0 deletions
--- a/benchmarks/kernels/benchmark_paged_attention.py
+++ b/benchmarks/kernels/benchmark_paged_attention.py
@ -7,10 +7,13 @@ from typing import Optional
 import torch
 from vllm import _custom_ops as ops
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
                        create_kv_caches_with_random)
 logger = init_logger(__name__)
 NUM_BLOCKS = 128 * 1024
 PARTITION_SIZE = 512
 PARTITION_SIZE_ROCM = 256
@ -193,6 +196,9 @@ def main(
 if __name__ == '__main__':
    logger.warning("This script benchmarks the paged attention kernel. "
                   "By default this is no longer used in vLLM inference.")
    parser = FlexibleArgumentParser(
        description="Benchmark the paged attention kernel.")
    parser.add_argument("--version",