From f16356fe361df9ccef3a9b46d6a43d43a854e2e0 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Fri, 5 Dec 2025 02:26:52 -0800 Subject: [PATCH] [bench] Support common prefix len config (for decode-only bench) (#29934) Signed-off-by: Ming Yang --- vllm/benchmarks/datasets.py | 1 + vllm/benchmarks/serve.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index ec9b0fd6e969c..638ece26071ef 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -1842,6 +1842,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: random_seed=args.seed, dataset_path=args.dataset_path, disable_shuffle=args.disable_shuffle, + prefix_len=args.common_prefix_len, ).sample( tokenizer=tokenizer, num_requests=args.num_prompts, diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 2933f5d01b274..890cd7e089fd6 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -1221,6 +1221,12 @@ def add_cli_args(parser: argparse.ArgumentParser): help="Repetition penalty sampling parameter. Only has effect on " "openai-compatible backends.", ) + sampling_group.add_argument( + "--common-prefix-len", + type=int, + default=None, + help="Common prefix length shared by all prompts (used by random dataset)", + ) parser.add_argument( "--tokenizer-mode",