mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:15:51 +08:00
[Bugfix] [ROCm] [UX]: revert Flex attention backend (#29371)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
parent
7992324f23
commit
64deead719
@ -36,6 +36,12 @@ def mock_on_gfx9():
|
||||
@pytest.mark.parametrize(
|
||||
"env_vars, selected_backend, expected_backend_path",
|
||||
[
|
||||
# Test Case: Explicit FLEX_ATTENTION backend
|
||||
(
|
||||
{},
|
||||
"FLEX_ATTENTION",
|
||||
AttentionBackendEnum.FLEX_ATTENTION.get_path(),
|
||||
),
|
||||
# Test Case 1: Default (no env vars, no explicit backend)
|
||||
(
|
||||
{},
|
||||
|
||||
@ -262,6 +262,10 @@ class RocmPlatform(Platform):
|
||||
f"is not MLA type while requested for MLA backend."
|
||||
)
|
||||
|
||||
if selected_backend == AttentionBackendEnum.FLEX_ATTENTION:
|
||||
logger.info("Using FlexAttention backend.")
|
||||
return AttentionBackendEnum.FLEX_ATTENTION.get_path()
|
||||
|
||||
if selected_backend == AttentionBackendEnum.TRITON_ATTN:
|
||||
logger.info("Using Triton Attention backend on V1 engine.")
|
||||
return AttentionBackendEnum.TRITON_ATTN.get_path()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user