mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 08:47:05 +08:00
Fix triton compilation issue (#3984)
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
fbb9d9eef4
commit
d04973ad54
@ -415,7 +415,11 @@ def attn_fwd(
|
|||||||
return
|
return
|
||||||
|
|
||||||
is_mqa = hq != hk
|
is_mqa = hq != hk
|
||||||
off_h_k = off_h_q % hk if is_mqa else off_h_q
|
if is_mqa: # noqa: SIM108
|
||||||
|
off_h_k = off_h_q % hk
|
||||||
|
else:
|
||||||
|
off_h_k = off_h_q
|
||||||
|
|
||||||
n_extra_tokens = 0
|
n_extra_tokens = 0
|
||||||
if seqlen_k < BLOCK_N:
|
if seqlen_k < BLOCK_N:
|
||||||
n_extra_tokens = BLOCK_N - seqlen_k
|
n_extra_tokens = BLOCK_N - seqlen_k
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user