mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-23 04:35:01 +08:00
[Bugfix] Triton FA function takes no keyword arguments (#16902)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
parent
eb8ef4224d
commit
047797ef90
@ -1091,7 +1091,14 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
||||
q,
|
||||
k,
|
||||
maybe_padded_v,
|
||||
**kwargs,
|
||||
None, # output
|
||||
kwargs["cu_seqlens_q"],
|
||||
kwargs["cu_seqlens_k"],
|
||||
kwargs["max_seqlen_q"],
|
||||
kwargs["max_seqlen_k"],
|
||||
kwargs["causal"],
|
||||
softmax_scale,
|
||||
None, # bias
|
||||
)
|
||||
if is_vllm_fa:
|
||||
attn_out = self.flash_attn_varlen_func(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user