mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-19 08:47:08 +08:00
add random-mm, random-rerank
This commit is contained in:
parent
3b2596d5a2
commit
9b0d1aa277
@ -1436,6 +1436,75 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
|
||||
help="Maximum distance for blazedit dataset. Min: 0, Max: 1.0",
|
||||
)
|
||||
|
||||
# Add random dataset arguments (random-mm and random-rerank)
|
||||
add_random_dataset_args(parser)
|
||||
|
||||
hf_group = parser.add_argument_group("hf dataset options")
|
||||
hf_group.add_argument(
|
||||
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
|
||||
)
|
||||
hf_group.add_argument(
|
||||
"--hf-split", type=str, default=None, help="Split of the HF dataset."
|
||||
)
|
||||
hf_group.add_argument(
|
||||
"--hf-name",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Name of the dataset on HuggingFace "
|
||||
"(e.g., 'lmarena-ai/VisionArena-Chat'). "
|
||||
"Specify this if your dataset-path is a local path."
|
||||
),
|
||||
)
|
||||
hf_group.add_argument(
|
||||
"--hf-output-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Output length for each request. Overrides the output lengths "
|
||||
"from the sampled HF dataset.",
|
||||
)
|
||||
|
||||
prefix_repetition_group = parser.add_argument_group(
|
||||
"prefix repetition dataset options"
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-prefix-len",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Number of prefix tokens per request, used only for prefix "
|
||||
"repetition dataset.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-suffix-len",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Number of suffix tokens per request, used only for prefix "
|
||||
"repetition dataset. Total input length is prefix_len + suffix_len.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-num-prefixes",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of prefixes to generate, used only for prefix repetition "
|
||||
"dataset. Prompts per prefix is num_requests // num_prefixes.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-output-len",
|
||||
type=int,
|
||||
default=128,
|
||||
help="Number of output tokens per request, used only for prefix "
|
||||
"repetition dataset.",
|
||||
)
|
||||
|
||||
def add_random_dataset_args(parser: FlexibleArgumentParser) -> None:
|
||||
"""Add CLI arguments for random multimodal and random reranking datasets.
|
||||
|
||||
This function adds arguments needed for:
|
||||
- random-mm (random multimodal dataset)
|
||||
- random-rerank (random dataset for reranking)
|
||||
|
||||
It can be called directly by benchmark scripts or by add_dataset_parser.
|
||||
"""
|
||||
random_group = parser.add_argument_group("random dataset options")
|
||||
random_group.add_argument(
|
||||
"--random-input-len",
|
||||
@ -1580,63 +1649,6 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
|
||||
),
|
||||
)
|
||||
|
||||
hf_group = parser.add_argument_group("hf dataset options")
|
||||
hf_group.add_argument(
|
||||
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
|
||||
)
|
||||
hf_group.add_argument(
|
||||
"--hf-split", type=str, default=None, help="Split of the HF dataset."
|
||||
)
|
||||
hf_group.add_argument(
|
||||
"--hf-name",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Name of the dataset on HuggingFace "
|
||||
"(e.g., 'lmarena-ai/VisionArena-Chat'). "
|
||||
"Specify this if your dataset-path is a local path."
|
||||
),
|
||||
)
|
||||
hf_group.add_argument(
|
||||
"--hf-output-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Output length for each request. Overrides the output lengths "
|
||||
"from the sampled HF dataset.",
|
||||
)
|
||||
|
||||
prefix_repetition_group = parser.add_argument_group(
|
||||
"prefix repetition dataset options"
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-prefix-len",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Number of prefix tokens per request, used only for prefix "
|
||||
"repetition dataset.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-suffix-len",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Number of suffix tokens per request, used only for prefix "
|
||||
"repetition dataset. Total input length is prefix_len + suffix_len.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-num-prefixes",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of prefixes to generate, used only for prefix repetition "
|
||||
"dataset. Prompts per prefix is num_requests // num_prefixes.",
|
||||
)
|
||||
prefix_repetition_group.add_argument(
|
||||
"--prefix-repetition-output-len",
|
||||
type=int,
|
||||
default=128,
|
||||
help="Number of output tokens per request, used only for prefix "
|
||||
"repetition dataset.",
|
||||
)
|
||||
|
||||
|
||||
def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
|
||||
if not hasattr(args, "request_id_prefix"):
|
||||
|
||||
@ -3,32 +3,33 @@
|
||||
r"""Benchmark multimodal processor latency.
|
||||
|
||||
This benchmark measures the latency of the mm processor module
|
||||
using randomly generated multimodal prompts with synthetic images.
|
||||
using multimodal prompts from datasets.
|
||||
MM processor stats are automatically enabled.
|
||||
|
||||
Run:
|
||||
vllm bench mm-processor \
|
||||
--model <your_model> \
|
||||
--dataset-name random-mm \
|
||||
--num-prompts 10 \
|
||||
--input-len 1024 \
|
||||
--output-len 128 \
|
||||
--num-images 1
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import dataclasses
|
||||
import json
|
||||
import time
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from vllm.benchmarks.throughput import get_requests
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.multimodal.processing import (
|
||||
get_timing_stats_from_engine_client,
|
||||
)
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
from vllm.utils.gc_utils import freeze_gc_heap
|
||||
from vllm.utils.import_utils import PlaceholderModule
|
||||
|
||||
@ -37,22 +38,6 @@ try:
|
||||
except ImportError:
|
||||
pd = PlaceholderModule("pandas")
|
||||
|
||||
|
||||
@dataclass
|
||||
class MultimodalProcessorBenchmarkMetrics:
|
||||
"""Metrics for multimodal processor benchmark."""
|
||||
|
||||
completed: int
|
||||
failed: int
|
||||
mean_e2el_ms: float
|
||||
median_e2el_ms: float
|
||||
std_e2el_ms: float
|
||||
percentiles_e2el_ms: list[tuple[float, float]]
|
||||
|
||||
"""Per-stage timing stats: mean, median, std, percentiles for each stage."""
|
||||
mm_processor_stats: dict[str, dict[str, float]]
|
||||
|
||||
|
||||
def collect_mm_processor_stats(
|
||||
llm_engine: Any,
|
||||
) -> dict[str, list[float]]:
|
||||
@ -118,54 +103,56 @@ def calculate_mm_processor_metrics(
|
||||
return metrics
|
||||
|
||||
|
||||
def generate_random_multimodal_prompts(
|
||||
num_prompts: int,
|
||||
input_len: int,
|
||||
output_len: int,
|
||||
tokenizer: Any,
|
||||
num_images: int = 1,
|
||||
image_width: int = 256,
|
||||
image_height: int = 256,
|
||||
seed: int = 0,
|
||||
) -> tuple[list[list[dict]], list[int]]:
|
||||
def validate_args(args):
|
||||
"""
|
||||
Generate random multimodal prompts with synthetic images and text tokens.
|
||||
|
||||
Returns:
|
||||
tuple: (prompts, expected_output_lens)
|
||||
- prompts: List of OpenAI chat format messages with text and images
|
||||
- expected_output_lens: List of expected output lengths
|
||||
Validate command-line arguments for mm_processor benchmark.
|
||||
"""
|
||||
from PIL import Image
|
||||
if not getattr(args, "tokenizer", None):
|
||||
args.tokenizer = args.model
|
||||
|
||||
from vllm.benchmarks.datasets import process_image
|
||||
if getattr(args, "dataset", None) is not None:
|
||||
warnings.warn(
|
||||
"The '--dataset' argument will be deprecated in the next release. "
|
||||
"Please use '--dataset-name' and '--dataset-path' instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
args.dataset_path = args.dataset
|
||||
|
||||
rng = np.random.default_rng(seed)
|
||||
if not hasattr(args, "dataset_path"):
|
||||
args.dataset_path = None
|
||||
if not hasattr(args, "data_parallel_size"):
|
||||
args.data_parallel_size = 1
|
||||
if not hasattr(args, "lora_path"):
|
||||
args.lora_path = None
|
||||
if not hasattr(args, "max_loras"):
|
||||
args.max_loras = None
|
||||
|
||||
prompts = []
|
||||
expected_output_lens = []
|
||||
# === Random Dataset Argument Conflict Detection ===
|
||||
# Check for conflicts between regular and random arguments when using random datasets
|
||||
dataset_name = getattr(args, "dataset_name", None)
|
||||
if dataset_name in {"random", "random-mm", "random-rerank"}:
|
||||
random_input_len = getattr(args, "random_input_len", None)
|
||||
random_output_len = getattr(args, "random_output_len", None)
|
||||
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||
input_len = getattr(args, "input_len", None)
|
||||
output_len = getattr(args, "output_len", None)
|
||||
prefix_len = getattr(args, "prefix_len", None)
|
||||
|
||||
for i in range(num_prompts):
|
||||
vocab_size = tokenizer.vocab_size
|
||||
prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist()
|
||||
|
||||
text_prompt = tokenizer.decode(prompt_token_ids)
|
||||
|
||||
mm_items = []
|
||||
for _ in range(num_images):
|
||||
random_pixels = rng.integers(
|
||||
0, 256, (image_height, image_width, 3), dtype=np.uint8
|
||||
if input_len is not None and random_input_len is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both --input-len and --random-input-len. "
|
||||
"For random datasets, use only one of them (prefer --random-input-len)."
|
||||
)
|
||||
if output_len is not None and random_output_len is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both --output-len and --random-output-len. "
|
||||
"For random datasets, use only one of them (prefer --random-output-len)."
|
||||
)
|
||||
if prefix_len is not None and random_prefix_len is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both --prefix-len and --random-prefix-len. "
|
||||
"For random datasets, use only one of them (prefer --random-prefix-len)."
|
||||
)
|
||||
image = Image.fromarray(random_pixels)
|
||||
mm_item = process_image(image)
|
||||
mm_items.append(mm_item)
|
||||
|
||||
content = [{"type": "text", "text": text_prompt}]
|
||||
content.extend(mm_items)
|
||||
prompts.append([{"role": "user", "content": content}])
|
||||
expected_output_lens.append(output_len)
|
||||
|
||||
return prompts, expected_output_lens
|
||||
|
||||
|
||||
def benchmark_multimodal_processor(
|
||||
@ -176,28 +163,33 @@ def benchmark_multimodal_processor(
|
||||
"""
|
||||
from vllm import LLM, SamplingParams
|
||||
|
||||
validate_args(args)
|
||||
|
||||
if args.seed is None:
|
||||
args.seed = 0
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
args.tokenizer,
|
||||
tokenizer_mode=getattr(args, "tokenizer_mode", "auto"),
|
||||
trust_remote_code=getattr(args, "trust_remote_code", False),
|
||||
)
|
||||
|
||||
requests = get_requests(args, tokenizer)
|
||||
|
||||
engine_args = EngineArgs.from_cli_args(args)
|
||||
llm = LLM(**dataclasses.asdict(engine_args))
|
||||
|
||||
assert llm.llm_engine.model_config.max_model_len >= (
|
||||
args.input_len + args.output_len
|
||||
assert all(
|
||||
llm.llm_engine.model_config.max_model_len
|
||||
>= (request.prompt_len + request.expected_output_len)
|
||||
for request in requests
|
||||
), (
|
||||
"Please ensure that max_model_len is greater than "
|
||||
"the sum of input_len and output_len."
|
||||
"Please ensure that max_model_len is greater than the sum of "
|
||||
"prompt_len and expected_output_len for all requests."
|
||||
)
|
||||
|
||||
seed = getattr(args, "seed", 0)
|
||||
tokenizer = llm.get_tokenizer()
|
||||
prompts, expected_output_lens = generate_random_multimodal_prompts(
|
||||
num_prompts=args.num_prompts,
|
||||
input_len=args.input_len,
|
||||
output_len=args.output_len,
|
||||
tokenizer=tokenizer,
|
||||
num_images=args.num_images,
|
||||
image_width=args.image_width,
|
||||
image_height=args.image_height,
|
||||
seed=seed,
|
||||
)
|
||||
prompts = [request.prompt for request in requests]
|
||||
expected_output_lens = [request.expected_output_len for request in requests]
|
||||
|
||||
sampling_params = [
|
||||
SamplingParams(
|
||||
@ -297,42 +289,32 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
|
||||
|
||||
parser.set_defaults(enable_mm_processor_stats=True)
|
||||
|
||||
parser.add_argument(
|
||||
"--dataset-name",
|
||||
type=str,
|
||||
default="random-mm",
|
||||
help="Name of the dataset to benchmark on. Defaults to 'random-mm'.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prefix-len",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Number of fixed prefix tokens before the random context in a request. "
|
||||
"For random datasets (random, random-mm, random-rerank), either this "
|
||||
"argument or the corresponding --random-prefix-len argument can be used, "
|
||||
"but not both.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-prompts",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of prompts to process.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input-len",
|
||||
type=int,
|
||||
default=1024,
|
||||
help="Number of input tokens per request.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-len",
|
||||
type=int,
|
||||
default=128,
|
||||
help="Number of output tokens per request.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-images",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of images per prompt.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image-width",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Width of generated images in pixels.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image-height",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Height of generated images in pixels.",
|
||||
)
|
||||
|
||||
from vllm.benchmarks.datasets import add_random_dataset_args
|
||||
|
||||
# (random, random-mm, random-rerank)
|
||||
add_random_dataset_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
"--output-json",
|
||||
@ -414,11 +396,8 @@ def main(args: argparse.Namespace) -> None:
|
||||
result["config"] = {
|
||||
"model": args.model,
|
||||
"num_prompts": args.num_prompts,
|
||||
"input_len": args.input_len,
|
||||
"output_len": args.output_len,
|
||||
"num_images": args.num_images,
|
||||
"image_width": args.image_width,
|
||||
"image_height": args.image_height,
|
||||
"input_len": getattr(args, "random_input_len", None),
|
||||
"output_len": getattr(args, "random_output_len", None),
|
||||
}
|
||||
result["timestamp"] = datetime.now().isoformat()
|
||||
|
||||
|
||||
@ -24,10 +24,13 @@ from vllm.benchmarks.datasets import (
|
||||
MultiModalConversationDataset,
|
||||
PrefixRepetitionRandomDataset,
|
||||
RandomDataset,
|
||||
RandomDatasetForReranking,
|
||||
RandomMultiModalDataset,
|
||||
SampleRequest,
|
||||
ShareGPTDataset,
|
||||
SonnetDataset,
|
||||
VisionArenaDataset,
|
||||
add_random_dataset_args,
|
||||
)
|
||||
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||
@ -351,7 +354,13 @@ def get_requests(args, tokenizer):
|
||||
and args.dataset_name not in {"prefix_repetition", "random-mm", "random-rerank"}
|
||||
):
|
||||
sample_kwargs["range_ratio"] = args.random_range_ratio
|
||||
sample_kwargs["prefix_len"] = args.prefix_len
|
||||
# prefer random_* arguments, fall back to regular arguments
|
||||
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||
sample_kwargs["prefix_len"] = random_prefix_len if random_prefix_len is not None else args.prefix_len
|
||||
random_input_len = getattr(args, "random_input_len", None)
|
||||
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else args.input_len
|
||||
random_output_len = getattr(args, "random_output_len", None)
|
||||
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else args.output_len
|
||||
dataset_cls = RandomDataset
|
||||
elif args.dataset_name == "sharegpt":
|
||||
dataset_cls = ShareGPTDataset
|
||||
@ -395,6 +404,39 @@ def get_requests(args, tokenizer):
|
||||
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
|
||||
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
|
||||
sample_kwargs["output_len"] = args.prefix_repetition_output_len
|
||||
elif args.dataset_name == "random-mm":
|
||||
dataset_cls = RandomMultiModalDataset
|
||||
# prefer random_* arguments, fall back to regular arguments
|
||||
random_input_len = getattr(args, "random_input_len", None)
|
||||
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else getattr(args, "input_len", None)
|
||||
random_output_len = getattr(args, "random_output_len", None)
|
||||
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else getattr(args, "output_len", None)
|
||||
sample_kwargs["base_items_per_request"] = getattr(
|
||||
args, "random_mm_base_items_per_request", None
|
||||
)
|
||||
sample_kwargs["num_mm_items_range_ratio"] = getattr(
|
||||
args, "random_mm_num_mm_items_range_ratio", None
|
||||
)
|
||||
sample_kwargs["limit_mm_per_prompt"] = getattr(
|
||||
args, "random_mm_limit_mm_per_prompt", None
|
||||
)
|
||||
sample_kwargs["bucket_config"] = getattr(
|
||||
args, "random_mm_bucket_config", None
|
||||
)
|
||||
sample_kwargs["enable_multimodal_chat"] = True
|
||||
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||
sample_kwargs["prefix_len"] = random_prefix_len if random_prefix_len is not None else args.prefix_len
|
||||
sample_kwargs["range_ratio"] = args.random_range_ratio
|
||||
elif args.dataset_name == "random-rerank":
|
||||
dataset_cls = RandomDatasetForReranking
|
||||
# prefer random_* arguments, fall back to regular arguments
|
||||
random_input_len = getattr(args, "random_input_len", None)
|
||||
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else getattr(args, "input_len", None)
|
||||
random_output_len = getattr(args, "random_output_len", None)
|
||||
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else getattr(args, "output_len", None)
|
||||
sample_kwargs["batchsize"] = getattr(args, "random_batch_size", 1)
|
||||
sample_kwargs["is_reranker"] = not getattr(args, "no_reranker", False)
|
||||
sample_kwargs["range_ratio"] = args.random_range_ratio
|
||||
else:
|
||||
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
|
||||
# Remove None values
|
||||
@ -451,8 +493,11 @@ def validate_args(args):
|
||||
):
|
||||
print("When dataset path is not set, it will default to random dataset")
|
||||
args.dataset_name = "random"
|
||||
if args.input_len is None:
|
||||
raise ValueError("input_len must be provided for a random dataset")
|
||||
random_input_len = getattr(args, "random_input_len", None)
|
||||
if args.input_len is None and random_input_len is None:
|
||||
raise ValueError(
|
||||
"Either --input-len or --random-input-len must be provided for a random dataset"
|
||||
)
|
||||
|
||||
# === Dataset Name Specific Checks ===
|
||||
# --hf-subset and --hf-split: only used
|
||||
@ -485,26 +530,66 @@ def validate_args(args):
|
||||
else:
|
||||
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
|
||||
|
||||
# --random-range-ratio: only used when dataset_name is 'random'
|
||||
if args.dataset_name != "random" and args.random_range_ratio is not None:
|
||||
# --random-range-ratio: only used when dataset_name is 'random', 'random-mm', or 'random-rerank'
|
||||
if args.dataset_name not in {"random", "random-mm", "random-rerank"} and args.random_range_ratio is not None:
|
||||
warnings.warn(
|
||||
"--random-range-ratio will be ignored since \
|
||||
--dataset-name is not 'random'.",
|
||||
--dataset-name is not 'random', 'random-mm', or 'random-rerank'.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# --prefix-len: only used when dataset_name is 'random', 'sonnet', or not
|
||||
# --random-batch-size: only used when dataset_name is 'random-rerank'
|
||||
if args.dataset_name != "random-rerank" and getattr(args, "random_batch_size", None) is not None:
|
||||
if args.random_batch_size != 1:
|
||||
warnings.warn(
|
||||
"--random-batch-size will be ignored since \
|
||||
--dataset-name is not 'random-rerank'.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# --no-reranker: only used when dataset_name is 'random-rerank'
|
||||
if args.dataset_name != "random-rerank" and getattr(args, "no_reranker", False):
|
||||
warnings.warn(
|
||||
"--no-reranker will be ignored since \
|
||||
--dataset-name is not 'random-rerank'.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# --prefix-len: only used when dataset_name is 'random', 'random-mm', 'sonnet', or not
|
||||
# set.
|
||||
if (
|
||||
args.dataset_name not in {"random", "sonnet", None}
|
||||
args.dataset_name not in {"random", "random-mm", "sonnet", None}
|
||||
and args.prefix_len is not None
|
||||
):
|
||||
warnings.warn(
|
||||
"--prefix-len will be ignored since --dataset-name\
|
||||
is not 'random', 'sonnet', or not set.",
|
||||
is not 'random', 'random-mm', 'sonnet', or not set.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# === Random Dataset Argument Conflict Detection ===
|
||||
# Check for conflicts between regular and random arguments when using random datasets
|
||||
if args.dataset_name in {"random", "random-mm", "random-rerank"}:
|
||||
random_input_len = getattr(args, "random_input_len", None)
|
||||
random_output_len = getattr(args, "random_output_len", None)
|
||||
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||
|
||||
if args.input_len is not None and random_input_len is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both --input-len and --random-input-len. "
|
||||
"For random datasets, use only one of them (prefer --random-input-len)."
|
||||
)
|
||||
if args.output_len is not None and random_output_len is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both --output-len and --random-output-len. "
|
||||
"For random datasets, use only one of them (prefer --random-output-len)."
|
||||
)
|
||||
if args.prefix_len is not None and random_prefix_len is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both --prefix-len and --random-prefix-len. "
|
||||
"For random datasets, use only one of them (prefer --random-prefix-len)."
|
||||
)
|
||||
|
||||
# === LoRA Settings ===
|
||||
if getattr(args, "enable_lora", False) and args.backend != "vllm":
|
||||
raise ValueError("LoRA benchmarking is only supported for vLLM backend")
|
||||
@ -554,7 +639,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--dataset-name",
|
||||
type=str,
|
||||
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf", "prefix_repetition"],
|
||||
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf", "prefix_repetition", "random-mm", "random-rerank"],
|
||||
help="Name of the dataset to benchmark on.",
|
||||
default="sharegpt",
|
||||
)
|
||||
@ -574,14 +659,20 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
"--input-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Input prompt length for each request",
|
||||
help="Input prompt length for each request. "
|
||||
"For random datasets (random, random-mm, random-rerank), either this "
|
||||
"argument or the corresponding --random-input-len argument can be used, "
|
||||
"but not both.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-len",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Output length for each request. Overrides the "
|
||||
"output length from the dataset.",
|
||||
"output length from the dataset. "
|
||||
"For random datasets (random, random-mm, random-rerank), either this "
|
||||
"argument or the corresponding --random-output-len argument can be used, "
|
||||
"but not both.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--n", type=int, default=1, help="Number of generated sequences per prompt."
|
||||
@ -634,17 +725,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
type=int,
|
||||
default=0,
|
||||
help="Number of fixed prefix tokens before the random "
|
||||
"context in a request (default: 0).",
|
||||
)
|
||||
# random dataset
|
||||
parser.add_argument(
|
||||
"--random-range-ratio",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Range ratio for sampling input/output length, "
|
||||
"used only for RandomDataset. Must be in the range [0, 1) to define "
|
||||
"a symmetric sampling range "
|
||||
"[length * (1 - range_ratio), length * (1 + range_ratio)].",
|
||||
"context in a request (default: 0). "
|
||||
"For random datasets (random, random-mm, random-rerank), either this "
|
||||
"argument or the corresponding --random-prefix-len argument can be used, "
|
||||
"but not both.",
|
||||
)
|
||||
|
||||
# hf dtaset
|
||||
@ -694,6 +778,9 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
"repetition dataset.",
|
||||
)
|
||||
|
||||
# (random, random-mm, random-rerank)
|
||||
add_random_dataset_args(parser)
|
||||
|
||||
parser = AsyncEngineArgs.add_cli_args(parser)
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user