[ci]try to fix flaky multi-step tests (#11894)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2026-07-06 17:37:15 +08:00 · 2025-01-09 22:47:29 +08:00 · 2025-01-09 22:47:29 +08:00 · bd82872211
commit bd82872211
parent 405eb8e396
2 changed files with 8 additions and 4 deletions
--- a/tests/multi_step/test_correctness_async_llm.py
+++ b/tests/multi_step/test_correctness_async_llm.py
@ -16,7 +16,6 @@ NUM_SCHEDULER_STEPS = [8]  # Multi-step decoding steps
 NUM_PROMPTS = [10]

 DEFAULT_SERVER_ARGS: List[str] = [
-    "--disable-log-requests",
    "--worker-use-ray",
    "--gpu-memory-utilization",
    "0.85",
@ -110,7 +109,7 @@ async def test_multi_step(

    # Spin up client/server & issue completion API requests.
    # Default `max_wait_seconds` is 240 but was empirically
-    # was raised 3x to 720 *just for this test* due to
+    # was raised 5x to 1200 *just for this test* due to
    # observed timeouts in GHA CI
    ref_completions = await completions_with_server_args(
        prompts,
--- a/tests/utils.py
+++ b/tests/utils.py
@ -157,13 +157,19 @@ class RemoteOpenAIServer:
    def url_for(self, *parts: str) -> str:
        return self.url_root + "/" + "/".join(parts)

-    def get_client(self):
+    def get_client(self, **kwargs):
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 600
        return openai.OpenAI(
            base_url=self.url_for("v1"),
            api_key=self.DUMMY_API_KEY,
+            max_retries=0,
+            **kwargs,
        )

    def get_async_client(self, **kwargs):
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = 600
        return openai.AsyncOpenAI(base_url=self.url_for("v1"),
                                  api_key=self.DUMMY_API_KEY,
                                  max_retries=0,
@ -780,7 +786,6 @@ async def completions_with_server_args(
    assert len(max_tokens) == len(prompts)

    outputs = None
-    max_wait_seconds = 240 * 3  # 240 is default
    with RemoteOpenAIServer(model_name,
                            server_cli_args,
                            max_wait_seconds=max_wait_seconds) as server: