[CI/Build][AMD] Add check for flash_att_varlen_func to test_tree_attention.py (#29252)

Signed-off-by: Randall Smith <ransmith@amd.com>
Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
rasmith 2025-11-22 22:45:08 -06:00 committed by GitHub
parent 71362ffab4
commit 3999442f1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,6 +3,7 @@
import math
import pytest
import torch
from tests.v1.attention.utils import (
@ -11,9 +12,16 @@ from tests.v1.attention.utils import (
try_get_attention_backend,
)
from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.attention.utils.fa_utils import is_flash_attn_varlen_func_available
from vllm.config import ParallelConfig, SpeculativeConfig
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
if not is_flash_attn_varlen_func_available():
pytest.skip(
"This test requires flash_attn_varlen_func, but it's not available.",
allow_module_level=True,
)
class MockAttentionLayer(torch.nn.Module):
_q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")