[Bugfix] [ROCm] [UX]: revert Flex attention backend (#29371)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
vllmellm 2025-11-25 14:56:06 +08:00 committed by GitHub
parent 7992324f23
commit 64deead719
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 0 deletions

View File

@ -36,6 +36,12 @@ def mock_on_gfx9():
@pytest.mark.parametrize(
"env_vars, selected_backend, expected_backend_path",
[
# Test Case: Explicit FLEX_ATTENTION backend
(
{},
"FLEX_ATTENTION",
AttentionBackendEnum.FLEX_ATTENTION.get_path(),
),
# Test Case 1: Default (no env vars, no explicit backend)
(
{},

View File

@ -262,6 +262,10 @@ class RocmPlatform(Platform):
f"is not MLA type while requested for MLA backend."
)
if selected_backend == AttentionBackendEnum.FLEX_ATTENTION:
logger.info("Using FlexAttention backend.")
return AttentionBackendEnum.FLEX_ATTENTION.get_path()
if selected_backend == AttentionBackendEnum.TRITON_ATTN:
logger.info("Using Triton Attention backend on V1 engine.")
return AttentionBackendEnum.TRITON_ATTN.get_path()