mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-23 17:51:21 +08:00
add random-mm, random-rerank
This commit is contained in:
parent
3b2596d5a2
commit
9b0d1aa277
@ -1436,6 +1436,75 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
|
|||||||
help="Maximum distance for blazedit dataset. Min: 0, Max: 1.0",
|
help="Maximum distance for blazedit dataset. Min: 0, Max: 1.0",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add random dataset arguments (random-mm and random-rerank)
|
||||||
|
add_random_dataset_args(parser)
|
||||||
|
|
||||||
|
hf_group = parser.add_argument_group("hf dataset options")
|
||||||
|
hf_group.add_argument(
|
||||||
|
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
|
||||||
|
)
|
||||||
|
hf_group.add_argument(
|
||||||
|
"--hf-split", type=str, default=None, help="Split of the HF dataset."
|
||||||
|
)
|
||||||
|
hf_group.add_argument(
|
||||||
|
"--hf-name",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help=(
|
||||||
|
"Name of the dataset on HuggingFace "
|
||||||
|
"(e.g., 'lmarena-ai/VisionArena-Chat'). "
|
||||||
|
"Specify this if your dataset-path is a local path."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
hf_group.add_argument(
|
||||||
|
"--hf-output-len",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Output length for each request. Overrides the output lengths "
|
||||||
|
"from the sampled HF dataset.",
|
||||||
|
)
|
||||||
|
|
||||||
|
prefix_repetition_group = parser.add_argument_group(
|
||||||
|
"prefix repetition dataset options"
|
||||||
|
)
|
||||||
|
prefix_repetition_group.add_argument(
|
||||||
|
"--prefix-repetition-prefix-len",
|
||||||
|
type=int,
|
||||||
|
default=256,
|
||||||
|
help="Number of prefix tokens per request, used only for prefix "
|
||||||
|
"repetition dataset.",
|
||||||
|
)
|
||||||
|
prefix_repetition_group.add_argument(
|
||||||
|
"--prefix-repetition-suffix-len",
|
||||||
|
type=int,
|
||||||
|
default=256,
|
||||||
|
help="Number of suffix tokens per request, used only for prefix "
|
||||||
|
"repetition dataset. Total input length is prefix_len + suffix_len.",
|
||||||
|
)
|
||||||
|
prefix_repetition_group.add_argument(
|
||||||
|
"--prefix-repetition-num-prefixes",
|
||||||
|
type=int,
|
||||||
|
default=10,
|
||||||
|
help="Number of prefixes to generate, used only for prefix repetition "
|
||||||
|
"dataset. Prompts per prefix is num_requests // num_prefixes.",
|
||||||
|
)
|
||||||
|
prefix_repetition_group.add_argument(
|
||||||
|
"--prefix-repetition-output-len",
|
||||||
|
type=int,
|
||||||
|
default=128,
|
||||||
|
help="Number of output tokens per request, used only for prefix "
|
||||||
|
"repetition dataset.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_random_dataset_args(parser: FlexibleArgumentParser) -> None:
|
||||||
|
"""Add CLI arguments for random multimodal and random reranking datasets.
|
||||||
|
|
||||||
|
This function adds arguments needed for:
|
||||||
|
- random-mm (random multimodal dataset)
|
||||||
|
- random-rerank (random dataset for reranking)
|
||||||
|
|
||||||
|
It can be called directly by benchmark scripts or by add_dataset_parser.
|
||||||
|
"""
|
||||||
random_group = parser.add_argument_group("random dataset options")
|
random_group = parser.add_argument_group("random dataset options")
|
||||||
random_group.add_argument(
|
random_group.add_argument(
|
||||||
"--random-input-len",
|
"--random-input-len",
|
||||||
@ -1580,63 +1649,6 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
hf_group = parser.add_argument_group("hf dataset options")
|
|
||||||
hf_group.add_argument(
|
|
||||||
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
|
|
||||||
)
|
|
||||||
hf_group.add_argument(
|
|
||||||
"--hf-split", type=str, default=None, help="Split of the HF dataset."
|
|
||||||
)
|
|
||||||
hf_group.add_argument(
|
|
||||||
"--hf-name",
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
help=(
|
|
||||||
"Name of the dataset on HuggingFace "
|
|
||||||
"(e.g., 'lmarena-ai/VisionArena-Chat'). "
|
|
||||||
"Specify this if your dataset-path is a local path."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
hf_group.add_argument(
|
|
||||||
"--hf-output-len",
|
|
||||||
type=int,
|
|
||||||
default=None,
|
|
||||||
help="Output length for each request. Overrides the output lengths "
|
|
||||||
"from the sampled HF dataset.",
|
|
||||||
)
|
|
||||||
|
|
||||||
prefix_repetition_group = parser.add_argument_group(
|
|
||||||
"prefix repetition dataset options"
|
|
||||||
)
|
|
||||||
prefix_repetition_group.add_argument(
|
|
||||||
"--prefix-repetition-prefix-len",
|
|
||||||
type=int,
|
|
||||||
default=256,
|
|
||||||
help="Number of prefix tokens per request, used only for prefix "
|
|
||||||
"repetition dataset.",
|
|
||||||
)
|
|
||||||
prefix_repetition_group.add_argument(
|
|
||||||
"--prefix-repetition-suffix-len",
|
|
||||||
type=int,
|
|
||||||
default=256,
|
|
||||||
help="Number of suffix tokens per request, used only for prefix "
|
|
||||||
"repetition dataset. Total input length is prefix_len + suffix_len.",
|
|
||||||
)
|
|
||||||
prefix_repetition_group.add_argument(
|
|
||||||
"--prefix-repetition-num-prefixes",
|
|
||||||
type=int,
|
|
||||||
default=10,
|
|
||||||
help="Number of prefixes to generate, used only for prefix repetition "
|
|
||||||
"dataset. Prompts per prefix is num_requests // num_prefixes.",
|
|
||||||
)
|
|
||||||
prefix_repetition_group.add_argument(
|
|
||||||
"--prefix-repetition-output-len",
|
|
||||||
type=int,
|
|
||||||
default=128,
|
|
||||||
help="Number of output tokens per request, used only for prefix "
|
|
||||||
"repetition dataset.",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
|
def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
|
||||||
if not hasattr(args, "request_id_prefix"):
|
if not hasattr(args, "request_id_prefix"):
|
||||||
|
|||||||
@ -3,32 +3,33 @@
|
|||||||
r"""Benchmark multimodal processor latency.
|
r"""Benchmark multimodal processor latency.
|
||||||
|
|
||||||
This benchmark measures the latency of the mm processor module
|
This benchmark measures the latency of the mm processor module
|
||||||
using randomly generated multimodal prompts with synthetic images.
|
using multimodal prompts from datasets.
|
||||||
MM processor stats are automatically enabled.
|
MM processor stats are automatically enabled.
|
||||||
|
|
||||||
Run:
|
Run:
|
||||||
vllm bench mm-processor \
|
vllm bench mm-processor \
|
||||||
--model <your_model> \
|
--model <your_model> \
|
||||||
|
--dataset-name random-mm \
|
||||||
--num-prompts 10 \
|
--num-prompts 10 \
|
||||||
--input-len 1024 \
|
|
||||||
--output-len 128 \
|
|
||||||
--num-images 1
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import warnings
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from vllm.benchmarks.throughput import get_requests
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
from vllm.multimodal.processing import (
|
from vllm.multimodal.processing import (
|
||||||
get_timing_stats_from_engine_client,
|
get_timing_stats_from_engine_client,
|
||||||
)
|
)
|
||||||
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.gc_utils import freeze_gc_heap
|
from vllm.utils.gc_utils import freeze_gc_heap
|
||||||
from vllm.utils.import_utils import PlaceholderModule
|
from vllm.utils.import_utils import PlaceholderModule
|
||||||
|
|
||||||
@ -37,22 +38,6 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pd = PlaceholderModule("pandas")
|
pd = PlaceholderModule("pandas")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MultimodalProcessorBenchmarkMetrics:
|
|
||||||
"""Metrics for multimodal processor benchmark."""
|
|
||||||
|
|
||||||
completed: int
|
|
||||||
failed: int
|
|
||||||
mean_e2el_ms: float
|
|
||||||
median_e2el_ms: float
|
|
||||||
std_e2el_ms: float
|
|
||||||
percentiles_e2el_ms: list[tuple[float, float]]
|
|
||||||
|
|
||||||
"""Per-stage timing stats: mean, median, std, percentiles for each stage."""
|
|
||||||
mm_processor_stats: dict[str, dict[str, float]]
|
|
||||||
|
|
||||||
|
|
||||||
def collect_mm_processor_stats(
|
def collect_mm_processor_stats(
|
||||||
llm_engine: Any,
|
llm_engine: Any,
|
||||||
) -> dict[str, list[float]]:
|
) -> dict[str, list[float]]:
|
||||||
@ -118,54 +103,56 @@ def calculate_mm_processor_metrics(
|
|||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
def generate_random_multimodal_prompts(
|
def validate_args(args):
|
||||||
num_prompts: int,
|
|
||||||
input_len: int,
|
|
||||||
output_len: int,
|
|
||||||
tokenizer: Any,
|
|
||||||
num_images: int = 1,
|
|
||||||
image_width: int = 256,
|
|
||||||
image_height: int = 256,
|
|
||||||
seed: int = 0,
|
|
||||||
) -> tuple[list[list[dict]], list[int]]:
|
|
||||||
"""
|
"""
|
||||||
Generate random multimodal prompts with synthetic images and text tokens.
|
Validate command-line arguments for mm_processor benchmark.
|
||||||
|
|
||||||
Returns:
|
|
||||||
tuple: (prompts, expected_output_lens)
|
|
||||||
- prompts: List of OpenAI chat format messages with text and images
|
|
||||||
- expected_output_lens: List of expected output lengths
|
|
||||||
"""
|
"""
|
||||||
from PIL import Image
|
if not getattr(args, "tokenizer", None):
|
||||||
|
args.tokenizer = args.model
|
||||||
|
|
||||||
from vllm.benchmarks.datasets import process_image
|
if getattr(args, "dataset", None) is not None:
|
||||||
|
warnings.warn(
|
||||||
|
"The '--dataset' argument will be deprecated in the next release. "
|
||||||
|
"Please use '--dataset-name' and '--dataset-path' instead.",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
args.dataset_path = args.dataset
|
||||||
|
|
||||||
rng = np.random.default_rng(seed)
|
if not hasattr(args, "dataset_path"):
|
||||||
|
args.dataset_path = None
|
||||||
|
if not hasattr(args, "data_parallel_size"):
|
||||||
|
args.data_parallel_size = 1
|
||||||
|
if not hasattr(args, "lora_path"):
|
||||||
|
args.lora_path = None
|
||||||
|
if not hasattr(args, "max_loras"):
|
||||||
|
args.max_loras = None
|
||||||
|
|
||||||
prompts = []
|
# === Random Dataset Argument Conflict Detection ===
|
||||||
expected_output_lens = []
|
# Check for conflicts between regular and random arguments when using random datasets
|
||||||
|
dataset_name = getattr(args, "dataset_name", None)
|
||||||
|
if dataset_name in {"random", "random-mm", "random-rerank"}:
|
||||||
|
random_input_len = getattr(args, "random_input_len", None)
|
||||||
|
random_output_len = getattr(args, "random_output_len", None)
|
||||||
|
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||||
|
input_len = getattr(args, "input_len", None)
|
||||||
|
output_len = getattr(args, "output_len", None)
|
||||||
|
prefix_len = getattr(args, "prefix_len", None)
|
||||||
|
|
||||||
for i in range(num_prompts):
|
if input_len is not None and random_input_len is not None:
|
||||||
vocab_size = tokenizer.vocab_size
|
raise ValueError(
|
||||||
prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist()
|
"Cannot specify both --input-len and --random-input-len. "
|
||||||
|
"For random datasets, use only one of them (prefer --random-input-len)."
|
||||||
text_prompt = tokenizer.decode(prompt_token_ids)
|
)
|
||||||
|
if output_len is not None and random_output_len is not None:
|
||||||
mm_items = []
|
raise ValueError(
|
||||||
for _ in range(num_images):
|
"Cannot specify both --output-len and --random-output-len. "
|
||||||
random_pixels = rng.integers(
|
"For random datasets, use only one of them (prefer --random-output-len)."
|
||||||
0, 256, (image_height, image_width, 3), dtype=np.uint8
|
)
|
||||||
|
if prefix_len is not None and random_prefix_len is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot specify both --prefix-len and --random-prefix-len. "
|
||||||
|
"For random datasets, use only one of them (prefer --random-prefix-len)."
|
||||||
)
|
)
|
||||||
image = Image.fromarray(random_pixels)
|
|
||||||
mm_item = process_image(image)
|
|
||||||
mm_items.append(mm_item)
|
|
||||||
|
|
||||||
content = [{"type": "text", "text": text_prompt}]
|
|
||||||
content.extend(mm_items)
|
|
||||||
prompts.append([{"role": "user", "content": content}])
|
|
||||||
expected_output_lens.append(output_len)
|
|
||||||
|
|
||||||
return prompts, expected_output_lens
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_multimodal_processor(
|
def benchmark_multimodal_processor(
|
||||||
@ -176,28 +163,33 @@ def benchmark_multimodal_processor(
|
|||||||
"""
|
"""
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
validate_args(args)
|
||||||
|
|
||||||
|
if args.seed is None:
|
||||||
|
args.seed = 0
|
||||||
|
|
||||||
|
tokenizer = get_tokenizer(
|
||||||
|
args.tokenizer,
|
||||||
|
tokenizer_mode=getattr(args, "tokenizer_mode", "auto"),
|
||||||
|
trust_remote_code=getattr(args, "trust_remote_code", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
requests = get_requests(args, tokenizer)
|
||||||
|
|
||||||
engine_args = EngineArgs.from_cli_args(args)
|
engine_args = EngineArgs.from_cli_args(args)
|
||||||
llm = LLM(**dataclasses.asdict(engine_args))
|
llm = LLM(**dataclasses.asdict(engine_args))
|
||||||
|
|
||||||
assert llm.llm_engine.model_config.max_model_len >= (
|
assert all(
|
||||||
args.input_len + args.output_len
|
llm.llm_engine.model_config.max_model_len
|
||||||
|
>= (request.prompt_len + request.expected_output_len)
|
||||||
|
for request in requests
|
||||||
), (
|
), (
|
||||||
"Please ensure that max_model_len is greater than "
|
"Please ensure that max_model_len is greater than the sum of "
|
||||||
"the sum of input_len and output_len."
|
"prompt_len and expected_output_len for all requests."
|
||||||
)
|
)
|
||||||
|
|
||||||
seed = getattr(args, "seed", 0)
|
prompts = [request.prompt for request in requests]
|
||||||
tokenizer = llm.get_tokenizer()
|
expected_output_lens = [request.expected_output_len for request in requests]
|
||||||
prompts, expected_output_lens = generate_random_multimodal_prompts(
|
|
||||||
num_prompts=args.num_prompts,
|
|
||||||
input_len=args.input_len,
|
|
||||||
output_len=args.output_len,
|
|
||||||
tokenizer=tokenizer,
|
|
||||||
num_images=args.num_images,
|
|
||||||
image_width=args.image_width,
|
|
||||||
image_height=args.image_height,
|
|
||||||
seed=seed,
|
|
||||||
)
|
|
||||||
|
|
||||||
sampling_params = [
|
sampling_params = [
|
||||||
SamplingParams(
|
SamplingParams(
|
||||||
@ -297,42 +289,32 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
|
|||||||
|
|
||||||
parser.set_defaults(enable_mm_processor_stats=True)
|
parser.set_defaults(enable_mm_processor_stats=True)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--dataset-name",
|
||||||
|
type=str,
|
||||||
|
default="random-mm",
|
||||||
|
help="Name of the dataset to benchmark on. Defaults to 'random-mm'.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--prefix-len",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Number of fixed prefix tokens before the random context in a request. "
|
||||||
|
"For random datasets (random, random-mm, random-rerank), either this "
|
||||||
|
"argument or the corresponding --random-prefix-len argument can be used, "
|
||||||
|
"but not both.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num-prompts",
|
"--num-prompts",
|
||||||
type=int,
|
type=int,
|
||||||
default=10,
|
default=10,
|
||||||
help="Number of prompts to process.",
|
help="Number of prompts to process.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--input-len",
|
from vllm.benchmarks.datasets import add_random_dataset_args
|
||||||
type=int,
|
|
||||||
default=1024,
|
# (random, random-mm, random-rerank)
|
||||||
help="Number of input tokens per request.",
|
add_random_dataset_args(parser)
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--output-len",
|
|
||||||
type=int,
|
|
||||||
default=128,
|
|
||||||
help="Number of output tokens per request.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--num-images",
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="Number of images per prompt.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--image-width",
|
|
||||||
type=int,
|
|
||||||
default=256,
|
|
||||||
help="Width of generated images in pixels.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--image-height",
|
|
||||||
type=int,
|
|
||||||
default=256,
|
|
||||||
help="Height of generated images in pixels.",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-json",
|
"--output-json",
|
||||||
@ -414,11 +396,8 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
result["config"] = {
|
result["config"] = {
|
||||||
"model": args.model,
|
"model": args.model,
|
||||||
"num_prompts": args.num_prompts,
|
"num_prompts": args.num_prompts,
|
||||||
"input_len": args.input_len,
|
"input_len": getattr(args, "random_input_len", None),
|
||||||
"output_len": args.output_len,
|
"output_len": getattr(args, "random_output_len", None),
|
||||||
"num_images": args.num_images,
|
|
||||||
"image_width": args.image_width,
|
|
||||||
"image_height": args.image_height,
|
|
||||||
}
|
}
|
||||||
result["timestamp"] = datetime.now().isoformat()
|
result["timestamp"] = datetime.now().isoformat()
|
||||||
|
|
||||||
|
|||||||
@ -24,10 +24,13 @@ from vllm.benchmarks.datasets import (
|
|||||||
MultiModalConversationDataset,
|
MultiModalConversationDataset,
|
||||||
PrefixRepetitionRandomDataset,
|
PrefixRepetitionRandomDataset,
|
||||||
RandomDataset,
|
RandomDataset,
|
||||||
|
RandomDatasetForReranking,
|
||||||
|
RandomMultiModalDataset,
|
||||||
SampleRequest,
|
SampleRequest,
|
||||||
ShareGPTDataset,
|
ShareGPTDataset,
|
||||||
SonnetDataset,
|
SonnetDataset,
|
||||||
VisionArenaDataset,
|
VisionArenaDataset,
|
||||||
|
add_random_dataset_args,
|
||||||
)
|
)
|
||||||
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
|
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||||
@ -351,7 +354,13 @@ def get_requests(args, tokenizer):
|
|||||||
and args.dataset_name not in {"prefix_repetition", "random-mm", "random-rerank"}
|
and args.dataset_name not in {"prefix_repetition", "random-mm", "random-rerank"}
|
||||||
):
|
):
|
||||||
sample_kwargs["range_ratio"] = args.random_range_ratio
|
sample_kwargs["range_ratio"] = args.random_range_ratio
|
||||||
sample_kwargs["prefix_len"] = args.prefix_len
|
# prefer random_* arguments, fall back to regular arguments
|
||||||
|
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||||
|
sample_kwargs["prefix_len"] = random_prefix_len if random_prefix_len is not None else args.prefix_len
|
||||||
|
random_input_len = getattr(args, "random_input_len", None)
|
||||||
|
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else args.input_len
|
||||||
|
random_output_len = getattr(args, "random_output_len", None)
|
||||||
|
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else args.output_len
|
||||||
dataset_cls = RandomDataset
|
dataset_cls = RandomDataset
|
||||||
elif args.dataset_name == "sharegpt":
|
elif args.dataset_name == "sharegpt":
|
||||||
dataset_cls = ShareGPTDataset
|
dataset_cls = ShareGPTDataset
|
||||||
@ -395,6 +404,39 @@ def get_requests(args, tokenizer):
|
|||||||
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
|
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
|
||||||
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
|
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
|
||||||
sample_kwargs["output_len"] = args.prefix_repetition_output_len
|
sample_kwargs["output_len"] = args.prefix_repetition_output_len
|
||||||
|
elif args.dataset_name == "random-mm":
|
||||||
|
dataset_cls = RandomMultiModalDataset
|
||||||
|
# prefer random_* arguments, fall back to regular arguments
|
||||||
|
random_input_len = getattr(args, "random_input_len", None)
|
||||||
|
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else getattr(args, "input_len", None)
|
||||||
|
random_output_len = getattr(args, "random_output_len", None)
|
||||||
|
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else getattr(args, "output_len", None)
|
||||||
|
sample_kwargs["base_items_per_request"] = getattr(
|
||||||
|
args, "random_mm_base_items_per_request", None
|
||||||
|
)
|
||||||
|
sample_kwargs["num_mm_items_range_ratio"] = getattr(
|
||||||
|
args, "random_mm_num_mm_items_range_ratio", None
|
||||||
|
)
|
||||||
|
sample_kwargs["limit_mm_per_prompt"] = getattr(
|
||||||
|
args, "random_mm_limit_mm_per_prompt", None
|
||||||
|
)
|
||||||
|
sample_kwargs["bucket_config"] = getattr(
|
||||||
|
args, "random_mm_bucket_config", None
|
||||||
|
)
|
||||||
|
sample_kwargs["enable_multimodal_chat"] = True
|
||||||
|
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||||
|
sample_kwargs["prefix_len"] = random_prefix_len if random_prefix_len is not None else args.prefix_len
|
||||||
|
sample_kwargs["range_ratio"] = args.random_range_ratio
|
||||||
|
elif args.dataset_name == "random-rerank":
|
||||||
|
dataset_cls = RandomDatasetForReranking
|
||||||
|
# prefer random_* arguments, fall back to regular arguments
|
||||||
|
random_input_len = getattr(args, "random_input_len", None)
|
||||||
|
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else getattr(args, "input_len", None)
|
||||||
|
random_output_len = getattr(args, "random_output_len", None)
|
||||||
|
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else getattr(args, "output_len", None)
|
||||||
|
sample_kwargs["batchsize"] = getattr(args, "random_batch_size", 1)
|
||||||
|
sample_kwargs["is_reranker"] = not getattr(args, "no_reranker", False)
|
||||||
|
sample_kwargs["range_ratio"] = args.random_range_ratio
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
|
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
|
||||||
# Remove None values
|
# Remove None values
|
||||||
@ -451,8 +493,11 @@ def validate_args(args):
|
|||||||
):
|
):
|
||||||
print("When dataset path is not set, it will default to random dataset")
|
print("When dataset path is not set, it will default to random dataset")
|
||||||
args.dataset_name = "random"
|
args.dataset_name = "random"
|
||||||
if args.input_len is None:
|
random_input_len = getattr(args, "random_input_len", None)
|
||||||
raise ValueError("input_len must be provided for a random dataset")
|
if args.input_len is None and random_input_len is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Either --input-len or --random-input-len must be provided for a random dataset"
|
||||||
|
)
|
||||||
|
|
||||||
# === Dataset Name Specific Checks ===
|
# === Dataset Name Specific Checks ===
|
||||||
# --hf-subset and --hf-split: only used
|
# --hf-subset and --hf-split: only used
|
||||||
@ -485,26 +530,66 @@ def validate_args(args):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
|
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
|
||||||
|
|
||||||
# --random-range-ratio: only used when dataset_name is 'random'
|
# --random-range-ratio: only used when dataset_name is 'random', 'random-mm', or 'random-rerank'
|
||||||
if args.dataset_name != "random" and args.random_range_ratio is not None:
|
if args.dataset_name not in {"random", "random-mm", "random-rerank"} and args.random_range_ratio is not None:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"--random-range-ratio will be ignored since \
|
"--random-range-ratio will be ignored since \
|
||||||
--dataset-name is not 'random'.",
|
--dataset-name is not 'random', 'random-mm', or 'random-rerank'.",
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
# --prefix-len: only used when dataset_name is 'random', 'sonnet', or not
|
# --random-batch-size: only used when dataset_name is 'random-rerank'
|
||||||
|
if args.dataset_name != "random-rerank" and getattr(args, "random_batch_size", None) is not None:
|
||||||
|
if args.random_batch_size != 1:
|
||||||
|
warnings.warn(
|
||||||
|
"--random-batch-size will be ignored since \
|
||||||
|
--dataset-name is not 'random-rerank'.",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --no-reranker: only used when dataset_name is 'random-rerank'
|
||||||
|
if args.dataset_name != "random-rerank" and getattr(args, "no_reranker", False):
|
||||||
|
warnings.warn(
|
||||||
|
"--no-reranker will be ignored since \
|
||||||
|
--dataset-name is not 'random-rerank'.",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --prefix-len: only used when dataset_name is 'random', 'random-mm', 'sonnet', or not
|
||||||
# set.
|
# set.
|
||||||
if (
|
if (
|
||||||
args.dataset_name not in {"random", "sonnet", None}
|
args.dataset_name not in {"random", "random-mm", "sonnet", None}
|
||||||
and args.prefix_len is not None
|
and args.prefix_len is not None
|
||||||
):
|
):
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"--prefix-len will be ignored since --dataset-name\
|
"--prefix-len will be ignored since --dataset-name\
|
||||||
is not 'random', 'sonnet', or not set.",
|
is not 'random', 'random-mm', 'sonnet', or not set.",
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# === Random Dataset Argument Conflict Detection ===
|
||||||
|
# Check for conflicts between regular and random arguments when using random datasets
|
||||||
|
if args.dataset_name in {"random", "random-mm", "random-rerank"}:
|
||||||
|
random_input_len = getattr(args, "random_input_len", None)
|
||||||
|
random_output_len = getattr(args, "random_output_len", None)
|
||||||
|
random_prefix_len = getattr(args, "random_prefix_len", None)
|
||||||
|
|
||||||
|
if args.input_len is not None and random_input_len is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot specify both --input-len and --random-input-len. "
|
||||||
|
"For random datasets, use only one of them (prefer --random-input-len)."
|
||||||
|
)
|
||||||
|
if args.output_len is not None and random_output_len is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot specify both --output-len and --random-output-len. "
|
||||||
|
"For random datasets, use only one of them (prefer --random-output-len)."
|
||||||
|
)
|
||||||
|
if args.prefix_len is not None and random_prefix_len is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot specify both --prefix-len and --random-prefix-len. "
|
||||||
|
"For random datasets, use only one of them (prefer --random-prefix-len)."
|
||||||
|
)
|
||||||
|
|
||||||
# === LoRA Settings ===
|
# === LoRA Settings ===
|
||||||
if getattr(args, "enable_lora", False) and args.backend != "vllm":
|
if getattr(args, "enable_lora", False) and args.backend != "vllm":
|
||||||
raise ValueError("LoRA benchmarking is only supported for vLLM backend")
|
raise ValueError("LoRA benchmarking is only supported for vLLM backend")
|
||||||
@ -554,7 +639,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--dataset-name",
|
"--dataset-name",
|
||||||
type=str,
|
type=str,
|
||||||
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf", "prefix_repetition"],
|
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf", "prefix_repetition", "random-mm", "random-rerank"],
|
||||||
help="Name of the dataset to benchmark on.",
|
help="Name of the dataset to benchmark on.",
|
||||||
default="sharegpt",
|
default="sharegpt",
|
||||||
)
|
)
|
||||||
@ -574,14 +659,20 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
"--input-len",
|
"--input-len",
|
||||||
type=int,
|
type=int,
|
||||||
default=None,
|
default=None,
|
||||||
help="Input prompt length for each request",
|
help="Input prompt length for each request. "
|
||||||
|
"For random datasets (random, random-mm, random-rerank), either this "
|
||||||
|
"argument or the corresponding --random-input-len argument can be used, "
|
||||||
|
"but not both.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-len",
|
"--output-len",
|
||||||
type=int,
|
type=int,
|
||||||
default=None,
|
default=None,
|
||||||
help="Output length for each request. Overrides the "
|
help="Output length for each request. Overrides the "
|
||||||
"output length from the dataset.",
|
"output length from the dataset. "
|
||||||
|
"For random datasets (random, random-mm, random-rerank), either this "
|
||||||
|
"argument or the corresponding --random-output-len argument can be used, "
|
||||||
|
"but not both.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--n", type=int, default=1, help="Number of generated sequences per prompt."
|
"--n", type=int, default=1, help="Number of generated sequences per prompt."
|
||||||
@ -634,17 +725,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
type=int,
|
type=int,
|
||||||
default=0,
|
default=0,
|
||||||
help="Number of fixed prefix tokens before the random "
|
help="Number of fixed prefix tokens before the random "
|
||||||
"context in a request (default: 0).",
|
"context in a request (default: 0). "
|
||||||
)
|
"For random datasets (random, random-mm, random-rerank), either this "
|
||||||
# random dataset
|
"argument or the corresponding --random-prefix-len argument can be used, "
|
||||||
parser.add_argument(
|
"but not both.",
|
||||||
"--random-range-ratio",
|
|
||||||
type=float,
|
|
||||||
default=0.0,
|
|
||||||
help="Range ratio for sampling input/output length, "
|
|
||||||
"used only for RandomDataset. Must be in the range [0, 1) to define "
|
|
||||||
"a symmetric sampling range "
|
|
||||||
"[length * (1 - range_ratio), length * (1 + range_ratio)].",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# hf dtaset
|
# hf dtaset
|
||||||
@ -694,6 +778,9 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
"repetition dataset.",
|
"repetition dataset.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# (random, random-mm, random-rerank)
|
||||||
|
add_random_dataset_args(parser)
|
||||||
|
|
||||||
parser = AsyncEngineArgs.add_cli_args(parser)
|
parser = AsyncEngineArgs.add_cli_args(parser)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user