mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:35:50 +08:00
[Benchmark][Doc] Update throughput benchmark and README (#15998)
Signed-off-by: StevenShi-23 <shi.ziji.sm@gmail.com> Signed-off-by: Roger Wang <ywang@roblox.com> Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
parent
230b131b54
commit
95862f7b4d
@ -51,6 +51,12 @@ become available.
|
|||||||
<td style="text-align: center;">✅</td>
|
<td style="text-align: center;">✅</td>
|
||||||
<td style="text-align: center;">✅</td>
|
<td style="text-align: center;">✅</td>
|
||||||
<td><code>likaixin/InstructCoder</code></td>
|
<td><code>likaixin/InstructCoder</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>HuggingFace-AIMO</strong></td>
|
||||||
|
<td style="text-align: center;">✅</td>
|
||||||
|
<td style="text-align: center;">✅</td>
|
||||||
|
<td><code>AI-MO/aimo-validation-aime</code> , <code>AI-MO/NuminaMath-1.5</code>, <code>AI-MO/NuminaMath-CoT</code></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><strong>HuggingFace-Other</strong></td>
|
<td><strong>HuggingFace-Other</strong></td>
|
||||||
@ -187,6 +193,17 @@ python3 vllm/benchmarks/benchmark_serving.py \
|
|||||||
--num-prompts 10
|
--num-prompts 10
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**`AI-MO/aimo-validation-aime`**
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
python3 vllm/benchmarks/benchmark_serving.py \
|
||||||
|
--model Qwen/QwQ-32B \
|
||||||
|
--dataset-name hf \
|
||||||
|
--dataset-path AI-MO/aimo-validation-aime \
|
||||||
|
--num-prompts 10 \
|
||||||
|
--seed 42
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
## Example - Offline Throughput Benchmark
|
## Example - Offline Throughput Benchmark
|
||||||
|
|
||||||
@ -278,6 +295,18 @@ python3 vllm/benchmarks/benchmark_throughput.py \
|
|||||||
--num-prompts 10
|
--num-prompts 10
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**`AI-MO/aimo-validation-aime`**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 benchmarks/benchmark_throughput.py \
|
||||||
|
--model Qwen/QwQ-32B \
|
||||||
|
--backend vllm \
|
||||||
|
--dataset-name hf \
|
||||||
|
--dataset-path AI-MO/aimo-validation-aime \
|
||||||
|
--hf-split train \
|
||||||
|
--num-prompts 10
|
||||||
|
```
|
||||||
|
|
||||||
### Benchmark with LoRA Adapters
|
### Benchmark with LoRA Adapters
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
|
|||||||
@ -11,10 +11,10 @@ from typing import Any, Optional, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
import uvloop
|
import uvloop
|
||||||
from benchmark_dataset import (BurstGPTDataset, ConversationDataset,
|
from benchmark_dataset import (AIMODataset, BurstGPTDataset,
|
||||||
InstructCoderDataset, RandomDataset,
|
ConversationDataset, InstructCoderDataset,
|
||||||
SampleRequest, ShareGPTDataset, SonnetDataset,
|
RandomDataset, SampleRequest, ShareGPTDataset,
|
||||||
VisionArenaDataset)
|
SonnetDataset, VisionArenaDataset)
|
||||||
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from transformers import (AutoModelForCausalLM, AutoTokenizer,
|
from transformers import (AutoModelForCausalLM, AutoTokenizer,
|
||||||
@ -332,7 +332,10 @@ def get_requests(args, tokenizer):
|
|||||||
common_kwargs['dataset_subset'] = args.hf_subset
|
common_kwargs['dataset_subset'] = args.hf_subset
|
||||||
common_kwargs['dataset_split'] = args.hf_split
|
common_kwargs['dataset_split'] = args.hf_split
|
||||||
sample_kwargs["enable_multimodal_chat"] = True
|
sample_kwargs["enable_multimodal_chat"] = True
|
||||||
|
elif args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS:
|
||||||
|
dataset_cls = AIMODataset
|
||||||
|
common_kwargs['dataset_subset'] = None
|
||||||
|
common_kwargs['dataset_split'] = "train"
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
|
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
|
||||||
# Remove None values
|
# Remove None values
|
||||||
@ -467,12 +470,13 @@ def validate_args(args):
|
|||||||
since --dataset-name is not 'hf'.",
|
since --dataset-name is not 'hf'.",
|
||||||
stacklevel=2)
|
stacklevel=2)
|
||||||
elif args.dataset_name == "hf":
|
elif args.dataset_name == "hf":
|
||||||
if args.dataset_path in VisionArenaDataset.SUPPORTED_DATASET_PATHS:
|
if args.dataset_path in (
|
||||||
assert args.backend == "vllm-chat", "VisionArenaDataset needs to use vllm-chat as the backend." #noqa: E501
|
VisionArenaDataset.SUPPORTED_DATASET_PATHS.keys()
|
||||||
elif args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS:
|
| ConversationDataset.SUPPORTED_DATASET_PATHS):
|
||||||
assert args.backend == "vllm", "InstructCoder dataset needs to use vllm as the backend." #noqa: E501
|
assert args.backend == "vllm-chat", f"{args.dataset_path} needs to use vllm-chat as the backend." #noqa: E501
|
||||||
elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS:
|
elif args.dataset_path in (InstructCoderDataset.SUPPORTED_DATASET_PATHS
|
||||||
assert args.backend == "vllm-chat", "ConversationDataset needs to use vllm-chat as the backend." #noqa: E501
|
| AIMODataset.SUPPORTED_DATASET_PATHS):
|
||||||
|
assert args.backend == "vllm", f"{args.dataset_path} needs to use vllm as the backend." #noqa: E501
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{args.dataset_path} is not supported by hf dataset.")
|
f"{args.dataset_path} is not supported by hf dataset.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user