From 047797ef904fd77b13af3a972e92ef6de37a36db Mon Sep 17 00:00:00 2001 From: vllmellm Date: Wed, 23 Apr 2025 12:35:24 +0800 Subject: [PATCH] [Bugfix] Triton FA function takes no keyword arguments (#16902) Signed-off-by: vllmellm --- vllm/attention/backends/mla/common.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py index 2517a59718382..a3dec0dbda9f8 100644 --- a/vllm/attention/backends/mla/common.py +++ b/vllm/attention/backends/mla/common.py @@ -1091,7 +1091,14 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): q, k, maybe_padded_v, - **kwargs, + None, # output + kwargs["cu_seqlens_q"], + kwargs["cu_seqlens_k"], + kwargs["max_seqlen_q"], + kwargs["max_seqlen_k"], + kwargs["causal"], + softmax_scale, + None, # bias ) if is_vllm_fa: attn_out = self.flash_attn_varlen_func(