From 1994de99ea0bf8dd84257a19800f4f62526a7edf Mon Sep 17 00:00:00 2001
From: Huamin Li <3ericli@gmail.com>
Date: Thu, 30 Oct 2025 05:27:53 -0700
Subject: [PATCH] [CI Failure] Fix test_kv_cache_model_load_and_run (#27717)

Signed-off-by: Huamin Li <3ericli@gmail.com>
---
 tests/quantization/test_fp8.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py
index 7f863a169d5f9..bb3572752d9e2 100644
--- a/tests/quantization/test_fp8.py
+++ b/tests/quantization/test_fp8.py
@@ -49,7 +49,18 @@ def test_model_load_and_run(
 
 KV_CACHE_MODELS = [
     # AutoFP8 format using separate .k_scale and .v_scale
-    "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V",
+    # The original checkpoint below was removed from the Hub. To unblock CI and
+    # until a small replacement with split K/V scales is found, skip this case.
+    # See PR #27717 for context.
+    pytest.param(
+        "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V",
+        marks=pytest.mark.skip(
+            reason=(
+                "Checkpoint removed from HF; temporarily disabling this "
+                "AutoFP8 split K/V case (PR #27717)."
+            )
+        ),
+    ),
 ]