[CI] Fix async scheduling + spec decoding test flake (#28902)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill 2025-11-17 21:26:32 -08:00 committed by GitHub
parent 0168f69e50
commit 5bdd155277
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -84,6 +84,7 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
"num_speculative_tokens": 2, "num_speculative_tokens": 2,
"model": "nm-testing/Llama3_2_1B_speculator.eagle3", "model": "nm-testing/Llama3_2_1B_speculator.eagle3",
} }
# Set small draft model len to force doesn't-fit-in-drafter case.
spec_config_short = spec_config | {"max_model_len": 50} spec_config_short = spec_config | {"max_model_len": 50}
# test_preemption, executor, async_scheduling, # test_preemption, executor, async_scheduling,
@ -174,13 +175,14 @@ def run_tests(
): ):
if "spec_mml=None" in test_config: if "spec_mml=None" in test_config:
assert ( assert (
pytest.approx(test_acceptance_rate, rel=5e-2) test_acceptance_rate > base_acceptance_rate
== base_acceptance_rate or test_acceptance_rate
== pytest.approx(base_acceptance_rate, rel=5e-2)
) )
else: else:
# Currently the reported acceptance rate is expected to be # Currently the reported acceptance rate is expected to be
# lower when we sometimes skip drafting altogether. # lower when we sometimes skip drafting altogether.
assert test_acceptance_rate > 0.05 assert test_acceptance_rate > 0.1
print( print(
f"PASSED: config=[{test_config}], params={params}" f"PASSED: config=[{test_config}], params={params}"
f" accept_rate={test_acceptance_rate}" f" accept_rate={test_acceptance_rate}"