From 684f2545851ee0ee49be9a80545ed497324f1a96 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Tue, 11 Nov 2025 11:13:51 -0600 Subject: [PATCH] Prefer FlashAttention MLA as default over FlashMLA (#27363) Signed-off-by: Matthew Bonanni --- vllm/platforms/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 43daf5e75b665..22c6dde754d01 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -55,15 +55,15 @@ def _get_backend_priorities( return [ AttentionBackendEnum.CUTLASS_MLA, AttentionBackendEnum.FLASHINFER_MLA, - AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.FLASH_ATTN_MLA, + AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.TRITON_MLA, AttentionBackendEnum.FLASHMLA_SPARSE, ] else: return [ - AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.FLASH_ATTN_MLA, + AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.FLASHINFER_MLA, AttentionBackendEnum.TRITON_MLA, AttentionBackendEnum.FLASHMLA_SPARSE,