mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 01:35:22 +08:00
[Kernel] Add punica dimension for Qwen1.5-32B LoRA (#4850)
Co-authored-by: Silencio <silencio@adsl-99-6-187-6.dsl.irvnca.sbcglobal.net>
This commit is contained in:
parent
10fa9eea21
commit
8435b207af
@ -53,6 +53,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 22016) \
|
f(in_T, out_T, W_T, narrow, 22016) \
|
||||||
f(in_T, out_T, W_T, narrow, 24576) \
|
f(in_T, out_T, W_T, narrow, 24576) \
|
||||||
f(in_T, out_T, W_T, narrow, 27392) \
|
f(in_T, out_T, W_T, narrow, 27392) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 27648) \
|
||||||
f(in_T, out_T, W_T, narrow, 28672) \
|
f(in_T, out_T, W_T, narrow, 28672) \
|
||||||
f(in_T, out_T, W_T, narrow, 32000) \
|
f(in_T, out_T, W_T, narrow, 32000) \
|
||||||
f(in_T, out_T, W_T, narrow, 32256) \
|
f(in_T, out_T, W_T, narrow, 32256) \
|
||||||
@ -121,6 +122,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, 22016, narrow) \
|
f(in_T, out_T, W_T, 22016, narrow) \
|
||||||
f(in_T, out_T, W_T, 24576, narrow) \
|
f(in_T, out_T, W_T, 24576, narrow) \
|
||||||
f(in_T, out_T, W_T, 27392, narrow) \
|
f(in_T, out_T, W_T, 27392, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 27648, narrow) \
|
||||||
f(in_T, out_T, W_T, 28672, narrow) \
|
f(in_T, out_T, W_T, 28672, narrow) \
|
||||||
f(in_T, out_T, W_T, 32000, narrow) \
|
f(in_T, out_T, W_T, 32000, narrow) \
|
||||||
f(in_T, out_T, W_T, 32256, narrow) \
|
f(in_T, out_T, W_T, 32256, narrow) \
|
||||||
|
|||||||
@ -79,6 +79,7 @@ H1 = H2 = [
|
|||||||
22016,
|
22016,
|
||||||
24576,
|
24576,
|
||||||
27392,
|
27392,
|
||||||
|
27648,
|
||||||
32000,
|
32000,
|
||||||
32256,
|
32256,
|
||||||
32512,
|
32512,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user