From 3999442f1c1d091dda370bc8cb2022f54b7d805b Mon Sep 17 00:00:00 2001
From: rasmith <Randall.Smith@amd.com>
Date: Sat, 22 Nov 2025 22:45:08 -0600
Subject: [PATCH] [CI/Build][AMD] Add check for flash_att_varlen_func to
 test_tree_attention.py (#29252)

Signed-off-by: Randall Smith <ransmith@amd.com>
Co-authored-by: Randall Smith <ransmith@amd.com>
---
 tests/v1/spec_decode/test_tree_attention.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/v1/spec_decode/test_tree_attention.py b/tests/v1/spec_decode/test_tree_attention.py
index 6958d62dc7e90..a4ee53008ce82 100644
--- a/tests/v1/spec_decode/test_tree_attention.py
+++ b/tests/v1/spec_decode/test_tree_attention.py
@@ -3,6 +3,7 @@
 
 import math
 
+import pytest
 import torch
 
 from tests.v1.attention.utils import (
@@ -11,9 +12,16 @@ from tests.v1.attention.utils import (
     try_get_attention_backend,
 )
 from vllm.attention.backends.registry import AttentionBackendEnum
+from vllm.attention.utils.fa_utils import is_flash_attn_varlen_func_available
 from vllm.config import ParallelConfig, SpeculativeConfig
 from vllm.v1.attention.backends.utils import CommonAttentionMetadata
 
+if not is_flash_attn_varlen_func_available():
+    pytest.skip(
+        "This test requires flash_attn_varlen_func, but it's not available.",
+        allow_module_level=True,
+    )
+
 
 class MockAttentionLayer(torch.nn.Module):
     _q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")