Add docs for PrefixRepetitionDataset + enable usage with vllm bench throughput (#23012)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
Co-authored-by: Roger Wang <hey@rogerw.me>
This commit is contained in:
Seiji Eicher 2025-08-16 03:21:20 -07:00 committed by GitHub
parent 2dbccce8a6
commit de9cb61763
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 6 deletions

View File

@ -40,7 +40,7 @@ become available.
<td><code>wget https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv</code></td> <td><code>wget https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv</code></td>
</tr> </tr>
<tr> <tr>
<td><strong>Sonnet</strong></td> <td><strong>Sonnet (deprecated)</strong></td>
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
<td>Local file: <code>benchmarks/sonnet.txt</code></td> <td>Local file: <code>benchmarks/sonnet.txt</code></td>
@ -51,6 +51,12 @@ become available.
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
<td><code>synthetic</code></td> <td><code>synthetic</code></td>
</tr> </tr>
<tr>
<td><strong>Prefix Repetition</strong></td>
<td style="text-align: center;"></td>
<td style="text-align: center;"></td>
<td><code>synthetic</code></td>
</tr>
<tr> <tr>
<td><strong>HuggingFace-VisionArena</strong></td> <td><strong>HuggingFace-VisionArena</strong></td>
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
@ -592,6 +598,20 @@ python3 benchmarks/benchmark_prefix_caching.py \
--input-length-range 128:256 --input-length-range 128:256
``` ```
### Prefix Repetition Dataset
```bash
vllm bench serve \
--backend openai \
--model meta-llama/Llama-2-7b-chat-hf \
--dataset-name prefix_repetition \
--num-prompts 100 \
--prefix-repetition-prefix-len 512 \
--prefix-repetition-suffix-len 128 \
--prefix-repetition-num-prefixes 5 \
--prefix-repetition-output-len 128
```
</details> </details>
## ⚡ Example - Request Prioritization Benchmark ## ⚡ Example - Request Prioritization Benchmark

View File

@ -18,9 +18,11 @@ from transformers import (AutoModelForCausalLM, AutoTokenizer,
from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset, from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset,
ConversationDataset, ConversationDataset,
InstructCoderDataset, RandomDataset, InstructCoderDataset,
SampleRequest, ShareGPTDataset, PrefixRepetitionRandomDataset,
SonnetDataset, VisionArenaDataset) RandomDataset, SampleRequest,
ShareGPTDataset, SonnetDataset,
VisionArenaDataset)
from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format, from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
write_to_json) write_to_json)
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
@ -327,6 +329,12 @@ def get_requests(args, tokenizer):
dataset_cls = AIMODataset dataset_cls = AIMODataset
common_kwargs['dataset_subset'] = None common_kwargs['dataset_subset'] = None
common_kwargs['dataset_split'] = "train" common_kwargs['dataset_split'] = "train"
elif args.dataset_name == "prefix_repetition":
dataset_cls = PrefixRepetitionRandomDataset
sample_kwargs["prefix_len"] = args.prefix_repetition_prefix_len
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
sample_kwargs["output_len"] = args.prefix_repetition_output_len
else: else:
raise ValueError(f"Unknown dataset name: {args.dataset_name}") raise ValueError(f"Unknown dataset name: {args.dataset_name}")
# Remove None values # Remove None values
@ -356,7 +364,11 @@ def validate_args(args):
raise ValueError(f"Unsupported backend: {args.backend}") raise ValueError(f"Unsupported backend: {args.backend}")
# === Dataset Configuration === # === Dataset Configuration ===
if not args.dataset and not args.dataset_path: if (
not args.dataset
and not args.dataset_path
and args.dataset_name not in {"prefix_repetition"}
):
print( print(
"When dataset path is not set, it will default to random dataset") "When dataset path is not set, it will default to random dataset")
args.dataset_name = 'random' args.dataset_name = 'random'
@ -432,7 +444,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument( parser.add_argument(
"--dataset-name", "--dataset-name",
type=str, type=str,
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf"], choices=[
"sharegpt", "random", "sonnet", "burstgpt", "hf",
"prefix_repetition"
],
help="Name of the dataset to benchmark on.", help="Name of the dataset to benchmark on.",
default="sharegpt") default="sharegpt")
parser.add_argument( parser.add_argument(
@ -521,6 +536,38 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=None, default=None,
help="Split of the HF dataset.") help="Split of the HF dataset.")
# prefix repetition dataset
prefix_repetition_group = parser.add_argument_group(
"prefix repetition dataset options")
prefix_repetition_group.add_argument(
"--prefix-repetition-prefix-len",
type=int,
default=None,
help="Number of prefix tokens per request, used only for prefix "
"repetition dataset.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-suffix-len",
type=int,
default=None,
help="Number of suffix tokens per request, used only for prefix "
"repetition dataset. Total input length is prefix_len + suffix_len.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-num-prefixes",
type=int,
default=None,
help="Number of prefixes to generate, used only for prefix repetition "
"dataset. Prompts per prefix is num_requests // num_prefixes.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-output-len",
type=int,
default=None,
help="Number of output tokens per request, used only for prefix "
"repetition dataset.",
)
parser = AsyncEngineArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser)