mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-02 18:47:03 +08:00
[CI/Build][AMD] Add check for flash_att_varlen_func to test_tree_attention.py (#29252)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
parent
71362ffab4
commit
3999442f1c
@ -3,6 +3,7 @@
|
||||
|
||||
import math
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from tests.v1.attention.utils import (
|
||||
@ -11,9 +12,16 @@ from tests.v1.attention.utils import (
|
||||
try_get_attention_backend,
|
||||
)
|
||||
from vllm.attention.backends.registry import AttentionBackendEnum
|
||||
from vllm.attention.utils.fa_utils import is_flash_attn_varlen_func_available
|
||||
from vllm.config import ParallelConfig, SpeculativeConfig
|
||||
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
|
||||
|
||||
if not is_flash_attn_varlen_func_available():
|
||||
pytest.skip(
|
||||
"This test requires flash_attn_varlen_func, but it's not available.",
|
||||
allow_module_level=True,
|
||||
)
|
||||
|
||||
|
||||
class MockAttentionLayer(torch.nn.Module):
|
||||
_q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user