From eea22a56ab08fb018e8fc51d1bf988cb85f37115 Mon Sep 17 00:00:00 2001 From: Shiyan Deng Date: Sun, 11 May 2025 00:53:31 -0700 Subject: [PATCH] fix amd triton mla path (#17871) --- vllm/attention/backends/mla/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py index 363aa08ef0030..d48462684906a 100644 --- a/vllm/attention/backends/mla/common.py +++ b/vllm/attention/backends/mla/common.py @@ -1063,7 +1063,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): softmax_scale, None, # bias ) - if is_vllm_fa: + elif is_vllm_fa: attn_out = self.flash_attn_varlen_func( q=q, k=k,