diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py index 363aa08ef0030..d48462684906a 100644 --- a/vllm/attention/backends/mla/common.py +++ b/vllm/attention/backends/mla/common.py @@ -1063,7 +1063,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): softmax_scale, None, # bias ) - if is_vllm_fa: + elif is_vllm_fa: attn_out = self.flash_attn_varlen_func( q=q, k=k,