diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py index f3ec6b5035889..7368bfd35fec9 100644 --- a/vllm/model_executor/layers/batch_invariant.py +++ b/vllm/model_executor/layers/batch_invariant.py @@ -756,10 +756,10 @@ def override_envs_for_invariance(): "FLEX_ATTENTION", "FLASHINFER", "FLASH_ATTN_MLA", + "FLASHINFER_MLA", "TRITON_MLA", # Not yet supported MLA backends # "FLASHMLA", - # "FLASHINFER_MLA", ] if curr_attn_backend not in supported_backends: warning = (