From 1b7624bf5cfc105a53a5f3f3f5486711ab15d429 Mon Sep 17 00:00:00 2001 From: Serena Date: Thu, 6 Mar 2025 05:28:50 +0800 Subject: [PATCH] [misc] Add FlashMLA as a new option of VLLM_ATTENTION_BACKEND env (#14267) --- vllm/envs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/envs.py b/vllm/envs.py index f6c038967b698..edabd647db2f7 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -321,6 +321,7 @@ environment_variables: dict[str, Callable[[], Any]] = { # - "XFORMERS": use XFormers # - "ROCM_FLASH": use ROCmFlashAttention # - "FLASHINFER": use flashinfer + # - "FLASHMLA": use FlashMLA "VLLM_ATTENTION_BACKEND": lambda: os.getenv("VLLM_ATTENTION_BACKEND", None),