mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 23:45:54 +08:00
Add docs for PrefixRepetitionDataset + enable usage with vllm bench throughput (#23012)
Signed-off-by: Seiji Eicher <seiji@anyscale.com> Co-authored-by: Roger Wang <hey@rogerw.me>
This commit is contained in:
parent
2dbccce8a6
commit
de9cb61763
@ -40,7 +40,7 @@ become available.
|
||||
<td><code>wget https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Sonnet</strong></td>
|
||||
<td><strong>Sonnet (deprecated)</strong></td>
|
||||
<td style="text-align: center;">✅</td>
|
||||
<td style="text-align: center;">✅</td>
|
||||
<td>Local file: <code>benchmarks/sonnet.txt</code></td>
|
||||
@ -51,6 +51,12 @@ become available.
|
||||
<td style="text-align: center;">✅</td>
|
||||
<td><code>synthetic</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Prefix Repetition</strong></td>
|
||||
<td style="text-align: center;">✅</td>
|
||||
<td style="text-align: center;">✅</td>
|
||||
<td><code>synthetic</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>HuggingFace-VisionArena</strong></td>
|
||||
<td style="text-align: center;">✅</td>
|
||||
@ -592,6 +598,20 @@ python3 benchmarks/benchmark_prefix_caching.py \
|
||||
--input-length-range 128:256
|
||||
```
|
||||
|
||||
### Prefix Repetition Dataset
|
||||
|
||||
```bash
|
||||
vllm bench serve \
|
||||
--backend openai \
|
||||
--model meta-llama/Llama-2-7b-chat-hf \
|
||||
--dataset-name prefix_repetition \
|
||||
--num-prompts 100 \
|
||||
--prefix-repetition-prefix-len 512 \
|
||||
--prefix-repetition-suffix-len 128 \
|
||||
--prefix-repetition-num-prefixes 5 \
|
||||
--prefix-repetition-output-len 128
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## ⚡ Example - Request Prioritization Benchmark
|
||||
|
||||
@ -18,9 +18,11 @@ from transformers import (AutoModelForCausalLM, AutoTokenizer,
|
||||
|
||||
from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset,
|
||||
ConversationDataset,
|
||||
InstructCoderDataset, RandomDataset,
|
||||
SampleRequest, ShareGPTDataset,
|
||||
SonnetDataset, VisionArenaDataset)
|
||||
InstructCoderDataset,
|
||||
PrefixRepetitionRandomDataset,
|
||||
RandomDataset, SampleRequest,
|
||||
ShareGPTDataset, SonnetDataset,
|
||||
VisionArenaDataset)
|
||||
from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
|
||||
write_to_json)
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||
@ -327,6 +329,12 @@ def get_requests(args, tokenizer):
|
||||
dataset_cls = AIMODataset
|
||||
common_kwargs['dataset_subset'] = None
|
||||
common_kwargs['dataset_split'] = "train"
|
||||
elif args.dataset_name == "prefix_repetition":
|
||||
dataset_cls = PrefixRepetitionRandomDataset
|
||||
sample_kwargs["prefix_len"] = args.prefix_repetition_prefix_len
|
||||
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
|
||||
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
|
||||
sample_kwargs["output_len"] = args.prefix_repetition_output_len
|
||||
else:
|
||||
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
|
||||
# Remove None values
|
||||
@ -356,7 +364,11 @@ def validate_args(args):
|
||||
raise ValueError(f"Unsupported backend: {args.backend}")
|
||||
|
||||
# === Dataset Configuration ===
|
||||
if not args.dataset and not args.dataset_path:
|
||||
if (
|
||||
not args.dataset
|
||||
and not args.dataset_path
|
||||
and args.dataset_name not in {"prefix_repetition"}
|
||||
):
|
||||
print(
|
||||
"When dataset path is not set, it will default to random dataset")
|
||||
args.dataset_name = 'random'
|
||||
@ -432,7 +444,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--dataset-name",
|
||||
type=str,
|
||||
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf"],
|
||||
choices=[
|
||||
"sharegpt", "random", "sonnet", "burstgpt", "hf",
|
||||
"prefix_repetition"
|
||||
],
|
||||
help="Name of the dataset to benchmark on.",
|
||||
default="sharegpt")
|
||||
parser.add_argument(
|
||||
@ -521,6 +536,38 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
default=None,
|
||||
help="Split of the HF dataset.")
|
||||
|
||||
# prefix repetition dataset
|
||||
prefix_repetition_group = parser.add_argument_group(
|
||||
"prefix repetition dataset options")
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-prefix-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Number of prefix tokens per request, used only for prefix "
|
||||
"repetition dataset.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-suffix-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Number of suffix tokens per request, used only for prefix "
|
||||
"repetition dataset. Total input length is prefix_len + suffix_len.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-num-prefixes",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Number of prefixes to generate, used only for prefix repetition "
|
||||
"dataset. Prompts per prefix is num_requests // num_prefixes.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-output-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Number of output tokens per request, used only for prefix "
|
||||
"repetition dataset.",
|
||||
)
|
||||
|
||||
parser = AsyncEngineArgs.add_cli_args(parser)
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user