mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 14:25:46 +08:00
Prefix Caching- fix t4 triton error (#2517)
This commit is contained in:
parent
5255d99dc5
commit
64da65b322
@ -618,7 +618,9 @@ if triton.__version__ >= "2.1.0":
|
|||||||
b_ctx_len,
|
b_ctx_len,
|
||||||
max_input_len,
|
max_input_len,
|
||||||
alibi_slopes=None):
|
alibi_slopes=None):
|
||||||
BLOCK = 128
|
|
||||||
|
cap = torch.cuda.get_device_capability()
|
||||||
|
BLOCK = 128 if cap[0] >= 8 else 64
|
||||||
# shape constraints
|
# shape constraints
|
||||||
Lq, Lk, Lv = q.shape[-1], k.shape[-1], v.shape[-1]
|
Lq, Lk, Lv = q.shape[-1], k.shape[-1], v.shape[-1]
|
||||||
assert Lq == Lk and Lk == Lv
|
assert Lq == Lk and Lk == Lv
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user