From 1994de99ea0bf8dd84257a19800f4f62526a7edf Mon Sep 17 00:00:00 2001 From: Huamin Li <3ericli@gmail.com> Date: Thu, 30 Oct 2025 05:27:53 -0700 Subject: [PATCH] [CI Failure] Fix test_kv_cache_model_load_and_run (#27717) Signed-off-by: Huamin Li <3ericli@gmail.com> --- tests/quantization/test_fp8.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index 7f863a169d5f9..bb3572752d9e2 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -49,7 +49,18 @@ def test_model_load_and_run( KV_CACHE_MODELS = [ # AutoFP8 format using separate .k_scale and .v_scale - "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V", + # The original checkpoint below was removed from the Hub. To unblock CI and + # until a small replacement with split K/V scales is found, skip this case. + # See PR #27717 for context. + pytest.param( + "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V", + marks=pytest.mark.skip( + reason=( + "Checkpoint removed from HF; temporarily disabling this " + "AutoFP8 split K/V case (PR #27717)." + ) + ), + ), ]