[Bugfix] [ROCm] [UX]: revert Flex attention backend (#29371)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
2026-05-23 01:24:27 +08:00 · 2025-11-25 14:56:06 +08:00 · 2025-11-25 14:56:06 +08:00 · 64deead719
commit 64deead719
parent 7992324f23
2 changed files with 10 additions and 0 deletions
--- a/tests/v1/attention/test_rocm_attention_backends_selection.py
+++ b/tests/v1/attention/test_rocm_attention_backends_selection.py
@ -36,6 +36,12 @@ def mock_on_gfx9():
@pytest.mark.parametrize(
    "env_vars, selected_backend, expected_backend_path",
    [
+        # Test Case: Explicit FLEX_ATTENTION backend
+        (
+            {},
+            "FLEX_ATTENTION",
+            AttentionBackendEnum.FLEX_ATTENTION.get_path(),
+        ),
        # Test Case 1: Default (no env vars, no explicit backend)
        (
            {},
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@ -262,6 +262,10 @@ class RocmPlatform(Platform):
                f"is not MLA type while requested for MLA backend."
            )

+        if selected_backend == AttentionBackendEnum.FLEX_ATTENTION:
+            logger.info("Using FlexAttention backend.")
+            return AttentionBackendEnum.FLEX_ATTENTION.get_path()
+
        if selected_backend == AttentionBackendEnum.TRITON_ATTN:
            logger.info("Using Triton Attention backend on V1 engine.")
            return AttentionBackendEnum.TRITON_ATTN.get_path()