From eea178398997b94b6df5560b597e2b7895b2ae3a Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:21:48 -0700 Subject: [PATCH] [benchmarks]allow skip ready check for bench serve (#25420) Signed-off-by: Lu Fang Signed-off-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Lucia (Lu) Fang --- vllm/benchmarks/serve.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 7382782f1165..2a042802d0d5 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -531,18 +531,22 @@ async def benchmark( extra_body=extra_body, ) - test_output = await wait_for_endpoint( - request_func, - test_input, - session, - timeout_seconds=ready_check_timeout_sec, - ) - if not test_output.success: - raise ValueError( - "Initial test run failed - Please make sure benchmark arguments " - f"are correctly specified. Error: {test_output.error}") + if ready_check_timeout_sec > 0: + test_output = await wait_for_endpoint( + request_func, + test_input, + session, + timeout_seconds=ready_check_timeout_sec, + ) + if not test_output.success: + raise ValueError( + "Initial test run failed - Please make sure benchmark " + "arguments are correctly specified. " + f"Error: {test_output.error}") + else: + print("Initial test run completed. Starting main benchmark run...") else: - print("Initial test run completed. Starting main benchmark run...") + print("Skipping endpoint ready check.") if lora_modules: # For each input request, choose a LoRA module at random. @@ -1151,7 +1155,8 @@ def add_cli_args(parser: argparse.ArgumentParser): type=int, default=600, help="Maximum time to wait for the endpoint to become ready " - "in seconds (default: 600 seconds / 10 minutes).", + "in seconds (default: 600 seconds / 10 minutes). If set to 0, " + "the ready check will be skipped." )