mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 05:55:01 +08:00
Prefer FlashAttention MLA as default over FlashMLA (#27363)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
e553424919
commit
684f254585
@ -55,15 +55,15 @@ def _get_backend_priorities(
|
|||||||
return [
|
return [
|
||||||
AttentionBackendEnum.CUTLASS_MLA,
|
AttentionBackendEnum.CUTLASS_MLA,
|
||||||
AttentionBackendEnum.FLASHINFER_MLA,
|
AttentionBackendEnum.FLASHINFER_MLA,
|
||||||
AttentionBackendEnum.FLASHMLA,
|
|
||||||
AttentionBackendEnum.FLASH_ATTN_MLA,
|
AttentionBackendEnum.FLASH_ATTN_MLA,
|
||||||
|
AttentionBackendEnum.FLASHMLA,
|
||||||
AttentionBackendEnum.TRITON_MLA,
|
AttentionBackendEnum.TRITON_MLA,
|
||||||
AttentionBackendEnum.FLASHMLA_SPARSE,
|
AttentionBackendEnum.FLASHMLA_SPARSE,
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
return [
|
return [
|
||||||
AttentionBackendEnum.FLASHMLA,
|
|
||||||
AttentionBackendEnum.FLASH_ATTN_MLA,
|
AttentionBackendEnum.FLASH_ATTN_MLA,
|
||||||
|
AttentionBackendEnum.FLASHMLA,
|
||||||
AttentionBackendEnum.FLASHINFER_MLA,
|
AttentionBackendEnum.FLASHINFER_MLA,
|
||||||
AttentionBackendEnum.TRITON_MLA,
|
AttentionBackendEnum.TRITON_MLA,
|
||||||
AttentionBackendEnum.FLASHMLA_SPARSE,
|
AttentionBackendEnum.FLASHMLA_SPARSE,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user