mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-11 06:17:03 +08:00
[tiny] Remove unsupported TRITON_MLA backend from batch invariance (#28832)
Signed-off-by: Bram Wasti <bwasti@meta.com> Signed-off-by: Bram Wasti <bwasti@fb.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
parent
2d4978a57e
commit
5f7209a793
@ -805,11 +805,11 @@ def override_envs_for_invariance():
|
||||
"FLASH_ATTN", # best supported backend
|
||||
"FLASHINFER",
|
||||
"FLASH_ATTN_MLA",
|
||||
"TRITON_MLA",
|
||||
# Not yet supported MLA backends
|
||||
# "FLASHMLA",
|
||||
# "FLEX_ATTENTION", # IMA issue even if we disable batch invariance
|
||||
# "FLASHINFER_MLA", https://github.com/vllm-project/vllm/pull/28967
|
||||
# "TRITON_MLA",
|
||||
]
|
||||
if curr_attn_backend not in supported_backends:
|
||||
warning = (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user