mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:04:53 +08:00
Prefix Caching- fix t4 triton error (#2517)
This commit is contained in:
parent
5255d99dc5
commit
64da65b322
@ -618,7 +618,9 @@ if triton.__version__ >= "2.1.0":
|
||||
b_ctx_len,
|
||||
max_input_len,
|
||||
alibi_slopes=None):
|
||||
BLOCK = 128
|
||||
|
||||
cap = torch.cuda.get_device_capability()
|
||||
BLOCK = 128 if cap[0] >= 8 else 64
|
||||
# shape constraints
|
||||
Lq, Lk, Lv = q.shape[-1], k.shape[-1], v.shape[-1]
|
||||
assert Lq == Lk and Lk == Lv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user