[CI] Fix async scheduling + spec decoding test flake (#28902)

Signed-off-by: Nick Hill <nhill@redhat.com>
2026-06-16 12:17:15 +08:00 · 2025-11-17 21:26:32 -08:00 · 2025-11-17 21:26:32 -08:00 · 5bdd155277
commit 5bdd155277
parent 0168f69e50
1 changed files with 5 additions and 3 deletions
--- a/tests/v1/e2e/test_async_scheduling.py
+++ b/tests/v1/e2e/test_async_scheduling.py
@ -84,6 +84,7 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
        "num_speculative_tokens": 2,
        "model": "nm-testing/Llama3_2_1B_speculator.eagle3",
    }
+    # Set small draft model len to force doesn't-fit-in-drafter case.
    spec_config_short = spec_config | {"max_model_len": 50}

    # test_preemption, executor, async_scheduling,
@ -174,13 +175,14 @@ def run_tests(
                ):
                    if "spec_mml=None" in test_config:
                        assert (
-                            pytest.approx(test_acceptance_rate, rel=5e-2)
-                            == base_acceptance_rate
+                            test_acceptance_rate > base_acceptance_rate
+                            or test_acceptance_rate
+                            == pytest.approx(base_acceptance_rate, rel=5e-2)
                        )
                    else:
                        # Currently the reported acceptance rate is expected to be
                        # lower when we sometimes skip drafting altogether.
-                        assert test_acceptance_rate > 0.05
+                        assert test_acceptance_rate > 0.1
                print(
                    f"PASSED: config=[{test_config}], params={params}"
                    f" accept_rate={test_acceptance_rate}"