From 684f2545851ee0ee49be9a80545ed497324f1a96 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Tue, 11 Nov 2025 11:13:51 -0600
Subject: [PATCH] Prefer FlashAttention MLA as default over FlashMLA (#27363)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 vllm/platforms/cuda.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 43daf5e75b665..22c6dde754d01 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -55,15 +55,15 @@ def _get_backend_priorities(
             return [
                 AttentionBackendEnum.CUTLASS_MLA,
                 AttentionBackendEnum.FLASHINFER_MLA,
-                AttentionBackendEnum.FLASHMLA,
                 AttentionBackendEnum.FLASH_ATTN_MLA,
+                AttentionBackendEnum.FLASHMLA,
                 AttentionBackendEnum.TRITON_MLA,
                 AttentionBackendEnum.FLASHMLA_SPARSE,
             ]
         else:
             return [
-                AttentionBackendEnum.FLASHMLA,
                 AttentionBackendEnum.FLASH_ATTN_MLA,
+                AttentionBackendEnum.FLASHMLA,
                 AttentionBackendEnum.FLASHINFER_MLA,
                 AttentionBackendEnum.TRITON_MLA,
                 AttentionBackendEnum.FLASHMLA_SPARSE,