[CI] Fix async scheduling + spec decoding test flake (#28902)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill 2025-11-17 21:26:32 -08:00 committed by GitHub
parent 0168f69e50
commit 5bdd155277
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -84,6 +84,7 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
"num_speculative_tokens": 2,
"model": "nm-testing/Llama3_2_1B_speculator.eagle3",
}
# Set small draft model len to force doesn't-fit-in-drafter case.
spec_config_short = spec_config | {"max_model_len": 50}
# test_preemption, executor, async_scheduling,
@ -174,13 +175,14 @@ def run_tests(
):
if "spec_mml=None" in test_config:
assert (
pytest.approx(test_acceptance_rate, rel=5e-2)
== base_acceptance_rate
test_acceptance_rate > base_acceptance_rate
or test_acceptance_rate
== pytest.approx(base_acceptance_rate, rel=5e-2)
)
else:
# Currently the reported acceptance rate is expected to be
# lower when we sometimes skip drafting altogether.
assert test_acceptance_rate > 0.05
assert test_acceptance_rate > 0.1
print(
f"PASSED: config=[{test_config}], params={params}"
f" accept_rate={test_acceptance_rate}"