add random-mm, random-rerank

This commit is contained in:
Reagan 2025-12-24 16:23:24 -08:00
parent 3b2596d5a2
commit 9b0d1aa277
3 changed files with 270 additions and 192 deletions

View File

@ -1436,6 +1436,75 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
help="Maximum distance for blazedit dataset. Min: 0, Max: 1.0",
)
# Add random dataset arguments (random-mm and random-rerank)
add_random_dataset_args(parser)
hf_group = parser.add_argument_group("hf dataset options")
hf_group.add_argument(
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
)
hf_group.add_argument(
"--hf-split", type=str, default=None, help="Split of the HF dataset."
)
hf_group.add_argument(
"--hf-name",
type=str,
default=None,
help=(
"Name of the dataset on HuggingFace "
"(e.g., 'lmarena-ai/VisionArena-Chat'). "
"Specify this if your dataset-path is a local path."
),
)
hf_group.add_argument(
"--hf-output-len",
type=int,
default=None,
help="Output length for each request. Overrides the output lengths "
"from the sampled HF dataset.",
)
prefix_repetition_group = parser.add_argument_group(
"prefix repetition dataset options"
)
prefix_repetition_group.add_argument(
"--prefix-repetition-prefix-len",
type=int,
default=256,
help="Number of prefix tokens per request, used only for prefix "
"repetition dataset.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-suffix-len",
type=int,
default=256,
help="Number of suffix tokens per request, used only for prefix "
"repetition dataset. Total input length is prefix_len + suffix_len.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-num-prefixes",
type=int,
default=10,
help="Number of prefixes to generate, used only for prefix repetition "
"dataset. Prompts per prefix is num_requests // num_prefixes.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-output-len",
type=int,
default=128,
help="Number of output tokens per request, used only for prefix "
"repetition dataset.",
)
def add_random_dataset_args(parser: FlexibleArgumentParser) -> None:
"""Add CLI arguments for random multimodal and random reranking datasets.
This function adds arguments needed for:
- random-mm (random multimodal dataset)
- random-rerank (random dataset for reranking)
It can be called directly by benchmark scripts or by add_dataset_parser.
"""
random_group = parser.add_argument_group("random dataset options")
random_group.add_argument(
"--random-input-len",
@ -1580,63 +1649,6 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
),
)
hf_group = parser.add_argument_group("hf dataset options")
hf_group.add_argument(
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
)
hf_group.add_argument(
"--hf-split", type=str, default=None, help="Split of the HF dataset."
)
hf_group.add_argument(
"--hf-name",
type=str,
default=None,
help=(
"Name of the dataset on HuggingFace "
"(e.g., 'lmarena-ai/VisionArena-Chat'). "
"Specify this if your dataset-path is a local path."
),
)
hf_group.add_argument(
"--hf-output-len",
type=int,
default=None,
help="Output length for each request. Overrides the output lengths "
"from the sampled HF dataset.",
)
prefix_repetition_group = parser.add_argument_group(
"prefix repetition dataset options"
)
prefix_repetition_group.add_argument(
"--prefix-repetition-prefix-len",
type=int,
default=256,
help="Number of prefix tokens per request, used only for prefix "
"repetition dataset.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-suffix-len",
type=int,
default=256,
help="Number of suffix tokens per request, used only for prefix "
"repetition dataset. Total input length is prefix_len + suffix_len.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-num-prefixes",
type=int,
default=10,
help="Number of prefixes to generate, used only for prefix repetition "
"dataset. Prompts per prefix is num_requests // num_prefixes.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-output-len",
type=int,
default=128,
help="Number of output tokens per request, used only for prefix "
"repetition dataset.",
)
def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
if not hasattr(args, "request_id_prefix"):

View File

@ -3,32 +3,33 @@
r"""Benchmark multimodal processor latency.
This benchmark measures the latency of the mm processor module
using randomly generated multimodal prompts with synthetic images.
using multimodal prompts from datasets.
MM processor stats are automatically enabled.
Run:
vllm bench mm-processor \
--model <your_model> \
--dataset-name random-mm \
--num-prompts 10 \
--input-len 1024 \
--output-len 128 \
--num-images 1
"""
import argparse
import dataclasses
import json
import time
import warnings
from dataclasses import dataclass
from datetime import datetime
from typing import Any
import numpy as np
from vllm.benchmarks.throughput import get_requests
from vllm.engine.arg_utils import EngineArgs
from vllm.multimodal.processing import (
get_timing_stats_from_engine_client,
)
from vllm.tokenizers import get_tokenizer
from vllm.utils.gc_utils import freeze_gc_heap
from vllm.utils.import_utils import PlaceholderModule
@ -37,22 +38,6 @@ try:
except ImportError:
pd = PlaceholderModule("pandas")
@dataclass
class MultimodalProcessorBenchmarkMetrics:
"""Metrics for multimodal processor benchmark."""
completed: int
failed: int
mean_e2el_ms: float
median_e2el_ms: float
std_e2el_ms: float
percentiles_e2el_ms: list[tuple[float, float]]
"""Per-stage timing stats: mean, median, std, percentiles for each stage."""
mm_processor_stats: dict[str, dict[str, float]]
def collect_mm_processor_stats(
llm_engine: Any,
) -> dict[str, list[float]]:
@ -118,54 +103,56 @@ def calculate_mm_processor_metrics(
return metrics
def generate_random_multimodal_prompts(
num_prompts: int,
input_len: int,
output_len: int,
tokenizer: Any,
num_images: int = 1,
image_width: int = 256,
image_height: int = 256,
seed: int = 0,
) -> tuple[list[list[dict]], list[int]]:
def validate_args(args):
"""
Generate random multimodal prompts with synthetic images and text tokens.
Returns:
tuple: (prompts, expected_output_lens)
- prompts: List of OpenAI chat format messages with text and images
- expected_output_lens: List of expected output lengths
Validate command-line arguments for mm_processor benchmark.
"""
from PIL import Image
if not getattr(args, "tokenizer", None):
args.tokenizer = args.model
from vllm.benchmarks.datasets import process_image
if getattr(args, "dataset", None) is not None:
warnings.warn(
"The '--dataset' argument will be deprecated in the next release. "
"Please use '--dataset-name' and '--dataset-path' instead.",
stacklevel=2,
)
args.dataset_path = args.dataset
rng = np.random.default_rng(seed)
if not hasattr(args, "dataset_path"):
args.dataset_path = None
if not hasattr(args, "data_parallel_size"):
args.data_parallel_size = 1
if not hasattr(args, "lora_path"):
args.lora_path = None
if not hasattr(args, "max_loras"):
args.max_loras = None
prompts = []
expected_output_lens = []
# === Random Dataset Argument Conflict Detection ===
# Check for conflicts between regular and random arguments when using random datasets
dataset_name = getattr(args, "dataset_name", None)
if dataset_name in {"random", "random-mm", "random-rerank"}:
random_input_len = getattr(args, "random_input_len", None)
random_output_len = getattr(args, "random_output_len", None)
random_prefix_len = getattr(args, "random_prefix_len", None)
input_len = getattr(args, "input_len", None)
output_len = getattr(args, "output_len", None)
prefix_len = getattr(args, "prefix_len", None)
for i in range(num_prompts):
vocab_size = tokenizer.vocab_size
prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist()
text_prompt = tokenizer.decode(prompt_token_ids)
mm_items = []
for _ in range(num_images):
random_pixels = rng.integers(
0, 256, (image_height, image_width, 3), dtype=np.uint8
if input_len is not None and random_input_len is not None:
raise ValueError(
"Cannot specify both --input-len and --random-input-len. "
"For random datasets, use only one of them (prefer --random-input-len)."
)
if output_len is not None and random_output_len is not None:
raise ValueError(
"Cannot specify both --output-len and --random-output-len. "
"For random datasets, use only one of them (prefer --random-output-len)."
)
if prefix_len is not None and random_prefix_len is not None:
raise ValueError(
"Cannot specify both --prefix-len and --random-prefix-len. "
"For random datasets, use only one of them (prefer --random-prefix-len)."
)
image = Image.fromarray(random_pixels)
mm_item = process_image(image)
mm_items.append(mm_item)
content = [{"type": "text", "text": text_prompt}]
content.extend(mm_items)
prompts.append([{"role": "user", "content": content}])
expected_output_lens.append(output_len)
return prompts, expected_output_lens
def benchmark_multimodal_processor(
@ -176,28 +163,33 @@ def benchmark_multimodal_processor(
"""
from vllm import LLM, SamplingParams
validate_args(args)
if args.seed is None:
args.seed = 0
tokenizer = get_tokenizer(
args.tokenizer,
tokenizer_mode=getattr(args, "tokenizer_mode", "auto"),
trust_remote_code=getattr(args, "trust_remote_code", False),
)
requests = get_requests(args, tokenizer)
engine_args = EngineArgs.from_cli_args(args)
llm = LLM(**dataclasses.asdict(engine_args))
assert llm.llm_engine.model_config.max_model_len >= (
args.input_len + args.output_len
assert all(
llm.llm_engine.model_config.max_model_len
>= (request.prompt_len + request.expected_output_len)
for request in requests
), (
"Please ensure that max_model_len is greater than "
"the sum of input_len and output_len."
"Please ensure that max_model_len is greater than the sum of "
"prompt_len and expected_output_len for all requests."
)
seed = getattr(args, "seed", 0)
tokenizer = llm.get_tokenizer()
prompts, expected_output_lens = generate_random_multimodal_prompts(
num_prompts=args.num_prompts,
input_len=args.input_len,
output_len=args.output_len,
tokenizer=tokenizer,
num_images=args.num_images,
image_width=args.image_width,
image_height=args.image_height,
seed=seed,
)
prompts = [request.prompt for request in requests]
expected_output_lens = [request.expected_output_len for request in requests]
sampling_params = [
SamplingParams(
@ -297,42 +289,32 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
parser.set_defaults(enable_mm_processor_stats=True)
parser.add_argument(
"--dataset-name",
type=str,
default="random-mm",
help="Name of the dataset to benchmark on. Defaults to 'random-mm'.",
)
parser.add_argument(
"--prefix-len",
type=int,
default=0,
help="Number of fixed prefix tokens before the random context in a request. "
"For random datasets (random, random-mm, random-rerank), either this "
"argument or the corresponding --random-prefix-len argument can be used, "
"but not both.",
)
parser.add_argument(
"--num-prompts",
type=int,
default=10,
help="Number of prompts to process.",
)
parser.add_argument(
"--input-len",
type=int,
default=1024,
help="Number of input tokens per request.",
)
parser.add_argument(
"--output-len",
type=int,
default=128,
help="Number of output tokens per request.",
)
parser.add_argument(
"--num-images",
type=int,
default=1,
help="Number of images per prompt.",
)
parser.add_argument(
"--image-width",
type=int,
default=256,
help="Width of generated images in pixels.",
)
parser.add_argument(
"--image-height",
type=int,
default=256,
help="Height of generated images in pixels.",
)
from vllm.benchmarks.datasets import add_random_dataset_args
# (random, random-mm, random-rerank)
add_random_dataset_args(parser)
parser.add_argument(
"--output-json",
@ -414,11 +396,8 @@ def main(args: argparse.Namespace) -> None:
result["config"] = {
"model": args.model,
"num_prompts": args.num_prompts,
"input_len": args.input_len,
"output_len": args.output_len,
"num_images": args.num_images,
"image_width": args.image_width,
"image_height": args.image_height,
"input_len": getattr(args, "random_input_len", None),
"output_len": getattr(args, "random_output_len", None),
}
result["timestamp"] = datetime.now().isoformat()

View File

@ -24,10 +24,13 @@ from vllm.benchmarks.datasets import (
MultiModalConversationDataset,
PrefixRepetitionRandomDataset,
RandomDataset,
RandomDatasetForReranking,
RandomMultiModalDataset,
SampleRequest,
ShareGPTDataset,
SonnetDataset,
VisionArenaDataset,
add_random_dataset_args,
)
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
@ -351,7 +354,13 @@ def get_requests(args, tokenizer):
and args.dataset_name not in {"prefix_repetition", "random-mm", "random-rerank"}
):
sample_kwargs["range_ratio"] = args.random_range_ratio
sample_kwargs["prefix_len"] = args.prefix_len
# prefer random_* arguments, fall back to regular arguments
random_prefix_len = getattr(args, "random_prefix_len", None)
sample_kwargs["prefix_len"] = random_prefix_len if random_prefix_len is not None else args.prefix_len
random_input_len = getattr(args, "random_input_len", None)
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else args.input_len
random_output_len = getattr(args, "random_output_len", None)
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else args.output_len
dataset_cls = RandomDataset
elif args.dataset_name == "sharegpt":
dataset_cls = ShareGPTDataset
@ -395,6 +404,39 @@ def get_requests(args, tokenizer):
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
sample_kwargs["output_len"] = args.prefix_repetition_output_len
elif args.dataset_name == "random-mm":
dataset_cls = RandomMultiModalDataset
# prefer random_* arguments, fall back to regular arguments
random_input_len = getattr(args, "random_input_len", None)
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else getattr(args, "input_len", None)
random_output_len = getattr(args, "random_output_len", None)
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else getattr(args, "output_len", None)
sample_kwargs["base_items_per_request"] = getattr(
args, "random_mm_base_items_per_request", None
)
sample_kwargs["num_mm_items_range_ratio"] = getattr(
args, "random_mm_num_mm_items_range_ratio", None
)
sample_kwargs["limit_mm_per_prompt"] = getattr(
args, "random_mm_limit_mm_per_prompt", None
)
sample_kwargs["bucket_config"] = getattr(
args, "random_mm_bucket_config", None
)
sample_kwargs["enable_multimodal_chat"] = True
random_prefix_len = getattr(args, "random_prefix_len", None)
sample_kwargs["prefix_len"] = random_prefix_len if random_prefix_len is not None else args.prefix_len
sample_kwargs["range_ratio"] = args.random_range_ratio
elif args.dataset_name == "random-rerank":
dataset_cls = RandomDatasetForReranking
# prefer random_* arguments, fall back to regular arguments
random_input_len = getattr(args, "random_input_len", None)
sample_kwargs["input_len"] = random_input_len if random_input_len is not None else getattr(args, "input_len", None)
random_output_len = getattr(args, "random_output_len", None)
sample_kwargs["output_len"] = random_output_len if random_output_len is not None else getattr(args, "output_len", None)
sample_kwargs["batchsize"] = getattr(args, "random_batch_size", 1)
sample_kwargs["is_reranker"] = not getattr(args, "no_reranker", False)
sample_kwargs["range_ratio"] = args.random_range_ratio
else:
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
# Remove None values
@ -451,8 +493,11 @@ def validate_args(args):
):
print("When dataset path is not set, it will default to random dataset")
args.dataset_name = "random"
if args.input_len is None:
raise ValueError("input_len must be provided for a random dataset")
random_input_len = getattr(args, "random_input_len", None)
if args.input_len is None and random_input_len is None:
raise ValueError(
"Either --input-len or --random-input-len must be provided for a random dataset"
)
# === Dataset Name Specific Checks ===
# --hf-subset and --hf-split: only used
@ -485,26 +530,66 @@ def validate_args(args):
else:
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
# --random-range-ratio: only used when dataset_name is 'random'
if args.dataset_name != "random" and args.random_range_ratio is not None:
# --random-range-ratio: only used when dataset_name is 'random', 'random-mm', or 'random-rerank'
if args.dataset_name not in {"random", "random-mm", "random-rerank"} and args.random_range_ratio is not None:
warnings.warn(
"--random-range-ratio will be ignored since \
--dataset-name is not 'random'.",
--dataset-name is not 'random', 'random-mm', or 'random-rerank'.",
stacklevel=2,
)
# --prefix-len: only used when dataset_name is 'random', 'sonnet', or not
# --random-batch-size: only used when dataset_name is 'random-rerank'
if args.dataset_name != "random-rerank" and getattr(args, "random_batch_size", None) is not None:
if args.random_batch_size != 1:
warnings.warn(
"--random-batch-size will be ignored since \
--dataset-name is not 'random-rerank'.",
stacklevel=2,
)
# --no-reranker: only used when dataset_name is 'random-rerank'
if args.dataset_name != "random-rerank" and getattr(args, "no_reranker", False):
warnings.warn(
"--no-reranker will be ignored since \
--dataset-name is not 'random-rerank'.",
stacklevel=2,
)
# --prefix-len: only used when dataset_name is 'random', 'random-mm', 'sonnet', or not
# set.
if (
args.dataset_name not in {"random", "sonnet", None}
args.dataset_name not in {"random", "random-mm", "sonnet", None}
and args.prefix_len is not None
):
warnings.warn(
"--prefix-len will be ignored since --dataset-name\
is not 'random', 'sonnet', or not set.",
is not 'random', 'random-mm', 'sonnet', or not set.",
stacklevel=2,
)
# === Random Dataset Argument Conflict Detection ===
# Check for conflicts between regular and random arguments when using random datasets
if args.dataset_name in {"random", "random-mm", "random-rerank"}:
random_input_len = getattr(args, "random_input_len", None)
random_output_len = getattr(args, "random_output_len", None)
random_prefix_len = getattr(args, "random_prefix_len", None)
if args.input_len is not None and random_input_len is not None:
raise ValueError(
"Cannot specify both --input-len and --random-input-len. "
"For random datasets, use only one of them (prefer --random-input-len)."
)
if args.output_len is not None and random_output_len is not None:
raise ValueError(
"Cannot specify both --output-len and --random-output-len. "
"For random datasets, use only one of them (prefer --random-output-len)."
)
if args.prefix_len is not None and random_prefix_len is not None:
raise ValueError(
"Cannot specify both --prefix-len and --random-prefix-len. "
"For random datasets, use only one of them (prefer --random-prefix-len)."
)
# === LoRA Settings ===
if getattr(args, "enable_lora", False) and args.backend != "vllm":
raise ValueError("LoRA benchmarking is only supported for vLLM backend")
@ -554,7 +639,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--dataset-name",
type=str,
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf", "prefix_repetition"],
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf", "prefix_repetition", "random-mm", "random-rerank"],
help="Name of the dataset to benchmark on.",
default="sharegpt",
)
@ -574,14 +659,20 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--input-len",
type=int,
default=None,
help="Input prompt length for each request",
help="Input prompt length for each request. "
"For random datasets (random, random-mm, random-rerank), either this "
"argument or the corresponding --random-input-len argument can be used, "
"but not both.",
)
parser.add_argument(
"--output-len",
type=int,
default=None,
help="Output length for each request. Overrides the "
"output length from the dataset.",
"output length from the dataset. "
"For random datasets (random, random-mm, random-rerank), either this "
"argument or the corresponding --random-output-len argument can be used, "
"but not both.",
)
parser.add_argument(
"--n", type=int, default=1, help="Number of generated sequences per prompt."
@ -634,17 +725,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
type=int,
default=0,
help="Number of fixed prefix tokens before the random "
"context in a request (default: 0).",
)
# random dataset
parser.add_argument(
"--random-range-ratio",
type=float,
default=0.0,
help="Range ratio for sampling input/output length, "
"used only for RandomDataset. Must be in the range [0, 1) to define "
"a symmetric sampling range "
"[length * (1 - range_ratio), length * (1 + range_ratio)].",
"context in a request (default: 0). "
"For random datasets (random, random-mm, random-rerank), either this "
"argument or the corresponding --random-prefix-len argument can be used, "
"but not both.",
)
# hf dtaset
@ -694,6 +778,9 @@ def add_cli_args(parser: argparse.ArgumentParser):
"repetition dataset.",
)
# (random, random-mm, random-rerank)
add_random_dataset_args(parser)
parser = AsyncEngineArgs.add_cli_args(parser)