Prefer FlashAttention MLA as default over FlashMLA (#27363)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Matthew Bonanni 2025-11-11 11:13:51 -06:00 committed by GitHub
parent e553424919
commit 684f254585
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -55,15 +55,15 @@ def _get_backend_priorities(
return [ return [
AttentionBackendEnum.CUTLASS_MLA, AttentionBackendEnum.CUTLASS_MLA,
AttentionBackendEnum.FLASHINFER_MLA, AttentionBackendEnum.FLASHINFER_MLA,
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.FLASH_ATTN_MLA, AttentionBackendEnum.FLASH_ATTN_MLA,
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.TRITON_MLA, AttentionBackendEnum.TRITON_MLA,
AttentionBackendEnum.FLASHMLA_SPARSE, AttentionBackendEnum.FLASHMLA_SPARSE,
] ]
else: else:
return [ return [
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.FLASH_ATTN_MLA, AttentionBackendEnum.FLASH_ATTN_MLA,
AttentionBackendEnum.FLASHMLA,
AttentionBackendEnum.FLASHINFER_MLA, AttentionBackendEnum.FLASHINFER_MLA,
AttentionBackendEnum.TRITON_MLA, AttentionBackendEnum.TRITON_MLA,
AttentionBackendEnum.FLASHMLA_SPARSE, AttentionBackendEnum.FLASHMLA_SPARSE,