From f16356fe361df9ccef3a9b46d6a43d43a854e2e0 Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Fri, 5 Dec 2025 02:26:52 -0800
Subject: [PATCH] [bench] Support common prefix len config (for decode-only
 bench) (#29934)

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 vllm/benchmarks/datasets.py | 1 +
 vllm/benchmarks/serve.py    | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
index ec9b0fd6e969c..638ece26071ef 100644
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -1842,6 +1842,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
                 random_seed=args.seed,
                 dataset_path=args.dataset_path,
                 disable_shuffle=args.disable_shuffle,
+                prefix_len=args.common_prefix_len,
             ).sample(
                 tokenizer=tokenizer,
                 num_requests=args.num_prompts,
diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index 2933f5d01b274..890cd7e089fd6 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -1221,6 +1221,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
         help="Repetition penalty sampling parameter. Only has effect on "
         "openai-compatible backends.",
     )
+    sampling_group.add_argument(
+        "--common-prefix-len",
+        type=int,
+        default=None,
+        help="Common prefix length shared by all prompts (used by random dataset)",
+    )
 
     parser.add_argument(
         "--tokenizer-mode",