mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 10:45:54 +08:00
[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
parent
997c8811d6
commit
23114d3364
@ -7,10 +7,13 @@ from typing import Optional
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vllm import _custom_ops as ops
|
from vllm import _custom_ops as ops
|
||||||
|
from vllm.logger import init_logger
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
|
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
|
||||||
create_kv_caches_with_random)
|
create_kv_caches_with_random)
|
||||||
|
|
||||||
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
NUM_BLOCKS = 128 * 1024
|
NUM_BLOCKS = 128 * 1024
|
||||||
PARTITION_SIZE = 512
|
PARTITION_SIZE = 512
|
||||||
PARTITION_SIZE_ROCM = 256
|
PARTITION_SIZE_ROCM = 256
|
||||||
@ -193,6 +196,9 @@ def main(
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
logger.warning("This script benchmarks the paged attention kernel. "
|
||||||
|
"By default this is no longer used in vLLM inference.")
|
||||||
|
|
||||||
parser = FlexibleArgumentParser(
|
parser = FlexibleArgumentParser(
|
||||||
description="Benchmark the paged attention kernel.")
|
description="Benchmark the paged attention kernel.")
|
||||||
parser.add_argument("--version",
|
parser.add_argument("--version",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user