[tiny] Remove unsupported TRITON_MLA backend from batch invariance (#28832)

Signed-off-by: Bram Wasti <bwasti@meta.com> Signed-off-by: Bram Wasti <bwasti@fb.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
2026-07-16 16:57:21 +08:00 · 2025-11-22 08:00:50 -05:00 · 2025-11-22 08:00:50 -05:00 · 5f7209a793
commit 5f7209a793
parent 2d4978a57e
1 changed files with 1 additions and 1 deletions
--- a/vllm/model_executor/layers/batch_invariant.py
+++ b/vllm/model_executor/layers/batch_invariant.py
@ -805,11 +805,11 @@ def override_envs_for_invariance():
        "FLASH_ATTN",  # best supported backend
        "FLASHINFER",
        "FLASH_ATTN_MLA",
-        "TRITON_MLA",
        # Not yet supported MLA backends
        # "FLASHMLA",
        # "FLEX_ATTENTION", # IMA issue even if we disable batch invariance
        # "FLASHINFER_MLA", https://github.com/vllm-project/vllm/pull/28967
+        # "TRITON_MLA",
    ]
    if curr_attn_backend not in supported_backends:
        warning = (