Prefer FlashAttention MLA as default over FlashMLA (#27363)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Matthew Bonanni 2025-11-11 11:13:51 -06:00 committed by GitHub
parent e553424919
commit 684f254585
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -55,15 +55,15 @@ def _get_backend_priorities(
return [
AttentionBackendEnum.CUTLASS_MLA,
AttentionBackendEnum.FLASHINFER_MLA,
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.FLASH_ATTN_MLA,
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.TRITON_MLA,
AttentionBackendEnum.FLASHMLA_SPARSE,
]
else:
return [
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.FLASH_ATTN_MLA,
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.FLASHINFER_MLA,
AttentionBackendEnum.TRITON_MLA,
AttentionBackendEnum.FLASHMLA_SPARSE,