diff --git a/vllm/benchmarks/serve_multi.py b/vllm/benchmarks/serve_multi.py new file mode 100644 index 0000000000000..f6f34a27fe2e4 --- /dev/null +++ b/vllm/benchmarks/serve_multi.py @@ -0,0 +1,226 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import argparse +import contextlib +import json +import os +import shlex +import signal +import subprocess + +_BAD_PARAMS_TYPE_MSG = ( + "The parameters to vary should be expressed as a JSON list of dictionaries." +) + + +def _validate_combs(params: list[dict[str, object]]): + if not isinstance(params, list): + raise TypeError(f"{_BAD_PARAMS_TYPE_MSG} Found JSON type {type(params)}") + + for comb in params: + if not isinstance(comb, dict): + raise TypeError(f"{_BAD_PARAMS_TYPE_MSG} Found item type {type(comb)}") + + return params + + +def _iter_cmd_key_candidates(param_key: str): + # We prefer "-" instead of "_", but the user-inputted command may contain "_" + yield "--" + param_key.replace("_", "-") + yield "--" + param_key.replace("-", "_") + yield "--" + param_key + + +def _override_args(cmd: list[str], params: dict[str, object]): + cmd = list(cmd) + + for k, v in params.items(): + for k_candidate in _iter_cmd_key_candidates(k): + try: + k_idx = cmd.index(k_candidate) + cmd[k_idx + 1] = str(v) + + break + except ValueError: + continue + else: + cmd.extend([next(_iter_cmd_key_candidates(k)), str(v)]) + + return cmd + + +def _get_result_dir(output_dir: str, params: dict[str, object]): + return os.path.join( + output_dir, + "_".join(f"{k}={v}" for k, v in params.items()) + ".json", + ) + + +def _get_result_path(result_dir: str, run_number: int): + return os.path.join(result_dir, f"run={run_number}.json") + + +def benchmark_one_run( + serve_cmd: list[str], + bench_cmd: list[str], + serve_comb: dict[str, object], + run_number: int, + result_dir: str, + dry_run: bool, +): + result_path = _get_result_path(result_dir, run_number) + + server_cmd = _override_args(serve_cmd, serve_comb) + benchmark_cmd = [ + *bench_cmd, + "--save-result", + "--result-dir", + result_dir, + "--result-filename", + os.path.basename(result_path), + ] + + print("=" * 60) + print(f"Parameter Combination: {serve_comb}") + print(f"Run Number: {run_number}") + print(f"Server command: {server_cmd}") + print(f"Benchmark command: {benchmark_cmd}") + print(f"Output file: {result_path}") + + if not dry_run: + # Create new process group for clean termination + server_process = subprocess.Popen(server_cmd, start_new_session=True) + + try: + subprocess.run(benchmark_cmd, check=True) + finally: + if server_process.poll() is None: + # Process might already be terminated + with contextlib.suppress(ProcessLookupError): + # Kill entire process group + os.killpg(os.getpgid(server_process.pid), signal.SIGKILL) + + return result_path + + +def benchmark_one_comb( + serve_cmd: list[str], + bench_cmd: list[str], + serve_comb: dict[str, object], + output_dir: str, + num_runs: int, + dry_run: bool, +): + result_dir = _get_result_dir(output_dir, serve_comb) + if not dry_run: + os.makedirs(result_dir, exist_ok=True) + + result_data = list[dict[str, object]]() + for run_number in range(num_runs): + result_path = benchmark_one_run( + serve_cmd=serve_cmd, + bench_cmd=bench_cmd, + serve_comb=serve_comb, + run_number=run_number, + result_dir=result_dir, + dry_run=dry_run, + ) + + if not dry_run: + with open(result_path, "rb") as f: + run_data = json.load(f) + + run_data.update(serve_comb) + run_data["run_number"] = run_number + + result_data.append(run_data) + + if not dry_run: + with open(os.path.join(result_dir, "summary.json"), "w") as f: + json.dump(result_data, f) + + +def benchmark_all( + serve_cmd: list[str], + bench_cmd: list[str], + serve_params: list[dict[str, object]], + output_dir: str, + num_runs: int, + dry_run: bool, +): + for serve_comb in _validate_combs(serve_params): + benchmark_one_comb( + serve_cmd=serve_cmd, + bench_cmd=bench_cmd, + serve_comb=serve_comb, + output_dir=output_dir, + num_runs=num_runs, + dry_run=dry_run, + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Run vLLM server benchmark on a parameter grid of settings." + ) + parser.add_argument( + "--serve-cmd", + type=str, + required=True, + help="The command used to run the server: `vllm serve ...`", + ) + parser.add_argument( + "--bench-cmd", + type=str, + required=True, + help="The command used to run the benchmark: `vllm bench serve...`", + ) + parser.add_argument( + "--serve-params", + type=str, + default=None, + help="Path to JSON file containing parameter combinations for the " + "`vllm serve` command.", + ) + parser.add_argument( + "-o", + "--output-dir", + type=str, + default="results", + help="The directory to which results are written.", + ) + parser.add_argument( + "--num-runs", + type=int, + default=3, + help="Number of runs per parameter combination", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="If set, only prints the commands to run.", + ) + + args = parser.parse_args() + + serve_cmd = shlex.split(args.serve_cmd) + bench_cmd = shlex.split(args.bench_cmd) + + if args.serve_params: + with open(args.serve_params, "rb") as f: + serve_params = json.load(f) + else: + serve_params = [{}] + + benchmark_all( + serve_cmd=serve_cmd, + bench_cmd=bench_cmd, + serve_params=serve_params, + output_dir=args.output_dir, + num_runs=args.num_runs, + dry_run=args.dry_run, + ) + + +if __name__ == "__main__": + main()