Add docs for PrefixRepetitionDataset + enable usage with vllm bench throughput (#23012)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
Co-authored-by: Roger Wang <hey@rogerw.me>
This commit is contained in:
Seiji Eicher 2025-08-16 03:21:20 -07:00 committed by GitHub
parent 2dbccce8a6
commit de9cb61763
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 6 deletions

View File

@ -40,7 +40,7 @@ become available.
<td><code>wget https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv</code></td>
</tr>
<tr>
<td><strong>Sonnet</strong></td>
<td><strong>Sonnet (deprecated)</strong></td>
<td style="text-align: center;"></td>
<td style="text-align: center;"></td>
<td>Local file: <code>benchmarks/sonnet.txt</code></td>
@ -51,6 +51,12 @@ become available.
<td style="text-align: center;"></td>
<td><code>synthetic</code></td>
</tr>
<tr>
<td><strong>Prefix Repetition</strong></td>
<td style="text-align: center;"></td>
<td style="text-align: center;"></td>
<td><code>synthetic</code></td>
</tr>
<tr>
<td><strong>HuggingFace-VisionArena</strong></td>
<td style="text-align: center;"></td>
@ -592,6 +598,20 @@ python3 benchmarks/benchmark_prefix_caching.py \
--input-length-range 128:256
```
### Prefix Repetition Dataset
```bash
vllm bench serve \
--backend openai \
--model meta-llama/Llama-2-7b-chat-hf \
--dataset-name prefix_repetition \
--num-prompts 100 \
--prefix-repetition-prefix-len 512 \
--prefix-repetition-suffix-len 128 \
--prefix-repetition-num-prefixes 5 \
--prefix-repetition-output-len 128
```
</details>
## ⚡ Example - Request Prioritization Benchmark

View File

@ -18,9 +18,11 @@ from transformers import (AutoModelForCausalLM, AutoTokenizer,
from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset,
ConversationDataset,
InstructCoderDataset, RandomDataset,
SampleRequest, ShareGPTDataset,
SonnetDataset, VisionArenaDataset)
InstructCoderDataset,
PrefixRepetitionRandomDataset,
RandomDataset, SampleRequest,
ShareGPTDataset, SonnetDataset,
VisionArenaDataset)
from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
write_to_json)
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
@ -327,6 +329,12 @@ def get_requests(args, tokenizer):
dataset_cls = AIMODataset
common_kwargs['dataset_subset'] = None
common_kwargs['dataset_split'] = "train"
elif args.dataset_name == "prefix_repetition":
dataset_cls = PrefixRepetitionRandomDataset
sample_kwargs["prefix_len"] = args.prefix_repetition_prefix_len
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
sample_kwargs["output_len"] = args.prefix_repetition_output_len
else:
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
# Remove None values
@ -356,7 +364,11 @@ def validate_args(args):
raise ValueError(f"Unsupported backend: {args.backend}")
# === Dataset Configuration ===
if not args.dataset and not args.dataset_path:
if (
not args.dataset
and not args.dataset_path
and args.dataset_name not in {"prefix_repetition"}
):
print(
"When dataset path is not set, it will default to random dataset")
args.dataset_name = 'random'
@ -432,7 +444,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--dataset-name",
type=str,
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf"],
choices=[
"sharegpt", "random", "sonnet", "burstgpt", "hf",
"prefix_repetition"
],
help="Name of the dataset to benchmark on.",
default="sharegpt")
parser.add_argument(
@ -521,6 +536,38 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=None,
help="Split of the HF dataset.")
# prefix repetition dataset
prefix_repetition_group = parser.add_argument_group(
"prefix repetition dataset options")
prefix_repetition_group.add_argument(
"--prefix-repetition-prefix-len",
type=int,
default=None,
help="Number of prefix tokens per request, used only for prefix "
"repetition dataset.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-suffix-len",
type=int,
default=None,
help="Number of suffix tokens per request, used only for prefix "
"repetition dataset. Total input length is prefix_len + suffix_len.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-num-prefixes",
type=int,
default=None,
help="Number of prefixes to generate, used only for prefix repetition "
"dataset. Prompts per prefix is num_requests // num_prefixes.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-output-len",
type=int,
default=None,
help="Number of output tokens per request, used only for prefix "
"repetition dataset.",
)
parser = AsyncEngineArgs.add_cli_args(parser)