diff --git a/vllm/benchmarks/serve_multi.py b/vllm/benchmarks/serve_multi.py
new file mode 100644
index 0000000000000..f6f34a27fe2e4
--- /dev/null
+++ b/vllm/benchmarks/serve_multi.py
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+import contextlib
+import json
+import os
+import shlex
+import signal
+import subprocess
+
+_BAD_PARAMS_TYPE_MSG = (
+    "The parameters to vary should be expressed as a JSON list of dictionaries."
+)
+
+
+def _validate_combs(params: list[dict[str, object]]):
+    if not isinstance(params, list):
+        raise TypeError(f"{_BAD_PARAMS_TYPE_MSG} Found JSON type {type(params)}")
+
+    for comb in params:
+        if not isinstance(comb, dict):
+            raise TypeError(f"{_BAD_PARAMS_TYPE_MSG} Found item type {type(comb)}")
+
+    return params
+
+
+def _iter_cmd_key_candidates(param_key: str):
+    # We prefer "-" instead of "_", but the user-inputted command may contain "_"
+    yield "--" + param_key.replace("_", "-")
+    yield "--" + param_key.replace("-", "_")
+    yield "--" + param_key
+
+
+def _override_args(cmd: list[str], params: dict[str, object]):
+    cmd = list(cmd)
+
+    for k, v in params.items():
+        for k_candidate in _iter_cmd_key_candidates(k):
+            try:
+                k_idx = cmd.index(k_candidate)
+                cmd[k_idx + 1] = str(v)
+
+                break
+            except ValueError:
+                continue
+        else:
+            cmd.extend([next(_iter_cmd_key_candidates(k)), str(v)])
+
+    return cmd
+
+
+def _get_result_dir(output_dir: str, params: dict[str, object]):
+    return os.path.join(
+        output_dir,
+        "_".join(f"{k}={v}" for k, v in params.items()) + ".json",
+    )
+
+
+def _get_result_path(result_dir: str, run_number: int):
+    return os.path.join(result_dir, f"run={run_number}.json")
+
+
+def benchmark_one_run(
+    serve_cmd: list[str],
+    bench_cmd: list[str],
+    serve_comb: dict[str, object],
+    run_number: int,
+    result_dir: str,
+    dry_run: bool,
+):
+    result_path = _get_result_path(result_dir, run_number)
+
+    server_cmd = _override_args(serve_cmd, serve_comb)
+    benchmark_cmd = [
+        *bench_cmd,
+        "--save-result",
+        "--result-dir",
+        result_dir,
+        "--result-filename",
+        os.path.basename(result_path),
+    ]
+
+    print("=" * 60)
+    print(f"Parameter Combination: {serve_comb}")
+    print(f"Run Number: {run_number}")
+    print(f"Server command: {server_cmd}")
+    print(f"Benchmark command: {benchmark_cmd}")
+    print(f"Output file: {result_path}")
+
+    if not dry_run:
+        # Create new process group for clean termination
+        server_process = subprocess.Popen(server_cmd, start_new_session=True)
+
+        try:
+            subprocess.run(benchmark_cmd, check=True)
+        finally:
+            if server_process.poll() is None:
+                # Process might already be terminated
+                with contextlib.suppress(ProcessLookupError):
+                    # Kill entire process group
+                    os.killpg(os.getpgid(server_process.pid), signal.SIGKILL)
+
+    return result_path
+
+
+def benchmark_one_comb(
+    serve_cmd: list[str],
+    bench_cmd: list[str],
+    serve_comb: dict[str, object],
+    output_dir: str,
+    num_runs: int,
+    dry_run: bool,
+):
+    result_dir = _get_result_dir(output_dir, serve_comb)
+    if not dry_run:
+        os.makedirs(result_dir, exist_ok=True)
+
+    result_data = list[dict[str, object]]()
+    for run_number in range(num_runs):
+        result_path = benchmark_one_run(
+            serve_cmd=serve_cmd,
+            bench_cmd=bench_cmd,
+            serve_comb=serve_comb,
+            run_number=run_number,
+            result_dir=result_dir,
+            dry_run=dry_run,
+        )
+
+        if not dry_run:
+            with open(result_path, "rb") as f:
+                run_data = json.load(f)
+
+            run_data.update(serve_comb)
+            run_data["run_number"] = run_number
+
+            result_data.append(run_data)
+
+    if not dry_run:
+        with open(os.path.join(result_dir, "summary.json"), "w") as f:
+            json.dump(result_data, f)
+
+
+def benchmark_all(
+    serve_cmd: list[str],
+    bench_cmd: list[str],
+    serve_params: list[dict[str, object]],
+    output_dir: str,
+    num_runs: int,
+    dry_run: bool,
+):
+    for serve_comb in _validate_combs(serve_params):
+        benchmark_one_comb(
+            serve_cmd=serve_cmd,
+            bench_cmd=bench_cmd,
+            serve_comb=serve_comb,
+            output_dir=output_dir,
+            num_runs=num_runs,
+            dry_run=dry_run,
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run vLLM server benchmark on a parameter grid of settings."
+    )
+    parser.add_argument(
+        "--serve-cmd",
+        type=str,
+        required=True,
+        help="The command used to run the server: `vllm serve ...`",
+    )
+    parser.add_argument(
+        "--bench-cmd",
+        type=str,
+        required=True,
+        help="The command used to run the benchmark: `vllm bench serve...`",
+    )
+    parser.add_argument(
+        "--serve-params",
+        type=str,
+        default=None,
+        help="Path to JSON file containing parameter combinations for the "
+        "`vllm serve` command.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        type=str,
+        default="results",
+        help="The directory to which results are written.",
+    )
+    parser.add_argument(
+        "--num-runs",
+        type=int,
+        default=3,
+        help="Number of runs per parameter combination",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="If set, only prints the commands to run.",
+    )
+
+    args = parser.parse_args()
+
+    serve_cmd = shlex.split(args.serve_cmd)
+    bench_cmd = shlex.split(args.bench_cmd)
+
+    if args.serve_params:
+        with open(args.serve_params, "rb") as f:
+            serve_params = json.load(f)
+    else:
+        serve_params = [{}]
+
+    benchmark_all(
+        serve_cmd=serve_cmd,
+        bench_cmd=bench_cmd,
+        serve_params=serve_params,
+        output_dir=args.output_dir,
+        num_runs=args.num_runs,
+        dry_run=args.dry_run,
+    )
+
+
+if __name__ == "__main__":
+    main()