mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 18:17:28 +08:00
Fix triton compilation issue (#3984)
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
fbb9d9eef4
commit
d04973ad54
@ -415,7 +415,11 @@ def attn_fwd(
|
||||
return
|
||||
|
||||
is_mqa = hq != hk
|
||||
off_h_k = off_h_q % hk if is_mqa else off_h_q
|
||||
if is_mqa: # noqa: SIM108
|
||||
off_h_k = off_h_q % hk
|
||||
else:
|
||||
off_h_k = off_h_q
|
||||
|
||||
n_extra_tokens = 0
|
||||
if seqlen_k < BLOCK_N:
|
||||
n_extra_tokens = BLOCK_N - seqlen_k
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user