[benchmarks]allow skip ready check for bench serve (#25420)

Signed-off-by: Lu Fang <fanglu@fb.com> Signed-off-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>
2025-12-09 22:25:32 +08:00 · 2025-09-22 20:21:48 -07:00 · 2025-09-22 20:21:48 -07:00 · eea1783989
commit eea1783989
parent f225ea7dd9
1 changed files with 17 additions and 12 deletions
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@ -531,18 +531,22 @@ async def benchmark(
        extra_body=extra_body,
    )

-    test_output = await wait_for_endpoint(
-        request_func,
-        test_input,
-        session,
-        timeout_seconds=ready_check_timeout_sec,
-    )
-    if not test_output.success:
-        raise ValueError(
-            "Initial test run failed - Please make sure benchmark arguments "
-            f"are correctly specified. Error: {test_output.error}")
+    if ready_check_timeout_sec > 0:
+        test_output = await wait_for_endpoint(
+            request_func,
+            test_input,
+            session,
+            timeout_seconds=ready_check_timeout_sec,
+        )
+        if not test_output.success:
+            raise ValueError(
+                "Initial test run failed - Please make sure benchmark "
+                "arguments are correctly specified. "
+                f"Error: {test_output.error}")
+        else:
+            print("Initial test run completed. Starting main benchmark run...")
    else:
-        print("Initial test run completed. Starting main benchmark run...")
+        print("Skipping endpoint ready check.")

    if lora_modules:
        # For each input request, choose a LoRA module at random.
@ -1151,7 +1155,8 @@ def add_cli_args(parser: argparse.ArgumentParser):
        type=int,
        default=600,
        help="Maximum time to wait for the endpoint to become ready "
-        "in seconds (default: 600 seconds / 10 minutes).",
+        "in seconds (default: 600 seconds / 10 minutes). If set to 0, "
+        "the ready check will be skipped."
    )