mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 07:15:01 +08:00
[Kernel] Add punica dimension for Qwen2 LoRA (#5441)
This commit is contained in:
parent
b12518d3cf
commit
1f5674218f
@ -16,15 +16,20 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 512) \
|
f(in_T, out_T, W_T, narrow, 512) \
|
||||||
f(in_T, out_T, W_T, narrow, 640) \
|
f(in_T, out_T, W_T, narrow, 640) \
|
||||||
f(in_T, out_T, W_T, narrow, 768) \
|
f(in_T, out_T, W_T, narrow, 768) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 896) \
|
||||||
f(in_T, out_T, W_T, narrow, 1024) \
|
f(in_T, out_T, W_T, narrow, 1024) \
|
||||||
f(in_T, out_T, W_T, narrow, 1152) \
|
f(in_T, out_T, W_T, narrow, 1152) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 1216) \
|
||||||
f(in_T, out_T, W_T, narrow, 1280) \
|
f(in_T, out_T, W_T, narrow, 1280) \
|
||||||
f(in_T, out_T, W_T, narrow, 1536) \
|
f(in_T, out_T, W_T, narrow, 1536) \
|
||||||
f(in_T, out_T, W_T, narrow, 1664) \
|
f(in_T, out_T, W_T, narrow, 1664) \
|
||||||
f(in_T, out_T, W_T, narrow, 1728) \
|
f(in_T, out_T, W_T, narrow, 1728) \
|
||||||
f(in_T, out_T, W_T, narrow, 1792) \
|
f(in_T, out_T, W_T, narrow, 1792) \
|
||||||
f(in_T, out_T, W_T, narrow, 2048) \
|
f(in_T, out_T, W_T, narrow, 2048) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 2240) \
|
||||||
f(in_T, out_T, W_T, narrow, 2304) \
|
f(in_T, out_T, W_T, narrow, 2304) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 2368) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 2432) \
|
||||||
f(in_T, out_T, W_T, narrow, 2560) \
|
f(in_T, out_T, W_T, narrow, 2560) \
|
||||||
f(in_T, out_T, W_T, narrow, 2752) \
|
f(in_T, out_T, W_T, narrow, 2752) \
|
||||||
f(in_T, out_T, W_T, narrow, 2816) \
|
f(in_T, out_T, W_T, narrow, 2816) \
|
||||||
@ -32,8 +37,12 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 3328) \
|
f(in_T, out_T, W_T, narrow, 3328) \
|
||||||
f(in_T, out_T, W_T, narrow, 3456) \
|
f(in_T, out_T, W_T, narrow, 3456) \
|
||||||
f(in_T, out_T, W_T, narrow, 3584) \
|
f(in_T, out_T, W_T, narrow, 3584) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 3712) \
|
||||||
f(in_T, out_T, W_T, narrow, 4096) \
|
f(in_T, out_T, W_T, narrow, 4096) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 4480) \
|
||||||
f(in_T, out_T, W_T, narrow, 4608) \
|
f(in_T, out_T, W_T, narrow, 4608) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 4736) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 4864) \
|
||||||
f(in_T, out_T, W_T, narrow, 5120) \
|
f(in_T, out_T, W_T, narrow, 5120) \
|
||||||
f(in_T, out_T, W_T, narrow, 5504) \
|
f(in_T, out_T, W_T, narrow, 5504) \
|
||||||
f(in_T, out_T, W_T, narrow, 5632) \
|
f(in_T, out_T, W_T, narrow, 5632) \
|
||||||
@ -43,8 +52,11 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 6848) \
|
f(in_T, out_T, W_T, narrow, 6848) \
|
||||||
f(in_T, out_T, W_T, narrow, 6912) \
|
f(in_T, out_T, W_T, narrow, 6912) \
|
||||||
f(in_T, out_T, W_T, narrow, 7168) \
|
f(in_T, out_T, W_T, narrow, 7168) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 7424) \
|
||||||
f(in_T, out_T, W_T, narrow, 8192) \
|
f(in_T, out_T, W_T, narrow, 8192) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 8960) \
|
||||||
f(in_T, out_T, W_T, narrow, 9216) \
|
f(in_T, out_T, W_T, narrow, 9216) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 9472) \
|
||||||
f(in_T, out_T, W_T, narrow, 10240) \
|
f(in_T, out_T, W_T, narrow, 10240) \
|
||||||
f(in_T, out_T, W_T, narrow, 11008) \
|
f(in_T, out_T, W_T, narrow, 11008) \
|
||||||
f(in_T, out_T, W_T, narrow, 11264) \
|
f(in_T, out_T, W_T, narrow, 11264) \
|
||||||
@ -52,8 +64,11 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 13696) \
|
f(in_T, out_T, W_T, narrow, 13696) \
|
||||||
f(in_T, out_T, W_T, narrow, 13824) \
|
f(in_T, out_T, W_T, narrow, 13824) \
|
||||||
f(in_T, out_T, W_T, narrow, 14336) \
|
f(in_T, out_T, W_T, narrow, 14336) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 14784) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 14848) \
|
||||||
f(in_T, out_T, W_T, narrow, 15360) \
|
f(in_T, out_T, W_T, narrow, 15360) \
|
||||||
f(in_T, out_T, W_T, narrow, 16384) \
|
f(in_T, out_T, W_T, narrow, 16384) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 18944) \
|
||||||
f(in_T, out_T, W_T, narrow, 20480) \
|
f(in_T, out_T, W_T, narrow, 20480) \
|
||||||
f(in_T, out_T, W_T, narrow, 22016) \
|
f(in_T, out_T, W_T, narrow, 22016) \
|
||||||
f(in_T, out_T, W_T, narrow, 22528) \
|
f(in_T, out_T, W_T, narrow, 22528) \
|
||||||
@ -61,6 +76,8 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, narrow, 27392) \
|
f(in_T, out_T, W_T, narrow, 27392) \
|
||||||
f(in_T, out_T, W_T, narrow, 27648) \
|
f(in_T, out_T, W_T, narrow, 27648) \
|
||||||
f(in_T, out_T, W_T, narrow, 28672) \
|
f(in_T, out_T, W_T, narrow, 28672) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 29568) \
|
||||||
|
f(in_T, out_T, W_T, narrow, 29696) \
|
||||||
f(in_T, out_T, W_T, narrow, 32000) \
|
f(in_T, out_T, W_T, narrow, 32000) \
|
||||||
f(in_T, out_T, W_T, narrow, 32256) \
|
f(in_T, out_T, W_T, narrow, 32256) \
|
||||||
f(in_T, out_T, W_T, narrow, 32512) \
|
f(in_T, out_T, W_T, narrow, 32512) \
|
||||||
@ -95,15 +112,20 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, 512, narrow) \
|
f(in_T, out_T, W_T, 512, narrow) \
|
||||||
f(in_T, out_T, W_T, 640, narrow) \
|
f(in_T, out_T, W_T, 640, narrow) \
|
||||||
f(in_T, out_T, W_T, 768, narrow) \
|
f(in_T, out_T, W_T, 768, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 896, narrow) \
|
||||||
f(in_T, out_T, W_T, 1024, narrow) \
|
f(in_T, out_T, W_T, 1024, narrow) \
|
||||||
f(in_T, out_T, W_T, 1152, narrow) \
|
f(in_T, out_T, W_T, 1152, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 1216, narrow) \
|
||||||
f(in_T, out_T, W_T, 1280, narrow) \
|
f(in_T, out_T, W_T, 1280, narrow) \
|
||||||
f(in_T, out_T, W_T, 1536, narrow) \
|
f(in_T, out_T, W_T, 1536, narrow) \
|
||||||
f(in_T, out_T, W_T, 1664, narrow) \
|
f(in_T, out_T, W_T, 1664, narrow) \
|
||||||
f(in_T, out_T, W_T, 1728, narrow) \
|
f(in_T, out_T, W_T, 1728, narrow) \
|
||||||
f(in_T, out_T, W_T, 1792, narrow) \
|
f(in_T, out_T, W_T, 1792, narrow) \
|
||||||
f(in_T, out_T, W_T, 2048, narrow) \
|
f(in_T, out_T, W_T, 2048, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 2240, narrow) \
|
||||||
f(in_T, out_T, W_T, 2304, narrow) \
|
f(in_T, out_T, W_T, 2304, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 2368, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 2432, narrow) \
|
||||||
f(in_T, out_T, W_T, 2560, narrow) \
|
f(in_T, out_T, W_T, 2560, narrow) \
|
||||||
f(in_T, out_T, W_T, 2752, narrow) \
|
f(in_T, out_T, W_T, 2752, narrow) \
|
||||||
f(in_T, out_T, W_T, 2816, narrow) \
|
f(in_T, out_T, W_T, 2816, narrow) \
|
||||||
@ -111,8 +133,12 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, 3328, narrow) \
|
f(in_T, out_T, W_T, 3328, narrow) \
|
||||||
f(in_T, out_T, W_T, 3456, narrow) \
|
f(in_T, out_T, W_T, 3456, narrow) \
|
||||||
f(in_T, out_T, W_T, 3584, narrow) \
|
f(in_T, out_T, W_T, 3584, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 3712, narrow) \
|
||||||
f(in_T, out_T, W_T, 4096, narrow) \
|
f(in_T, out_T, W_T, 4096, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 4480, narrow) \
|
||||||
f(in_T, out_T, W_T, 4608, narrow) \
|
f(in_T, out_T, W_T, 4608, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 4736, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 4864, narrow) \
|
||||||
f(in_T, out_T, W_T, 5120, narrow) \
|
f(in_T, out_T, W_T, 5120, narrow) \
|
||||||
f(in_T, out_T, W_T, 5504, narrow) \
|
f(in_T, out_T, W_T, 5504, narrow) \
|
||||||
f(in_T, out_T, W_T, 5632, narrow) \
|
f(in_T, out_T, W_T, 5632, narrow) \
|
||||||
@ -122,8 +148,11 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, 6848, narrow) \
|
f(in_T, out_T, W_T, 6848, narrow) \
|
||||||
f(in_T, out_T, W_T, 6912, narrow) \
|
f(in_T, out_T, W_T, 6912, narrow) \
|
||||||
f(in_T, out_T, W_T, 7168, narrow) \
|
f(in_T, out_T, W_T, 7168, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 7424, narrow) \
|
||||||
f(in_T, out_T, W_T, 8192, narrow) \
|
f(in_T, out_T, W_T, 8192, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 8960, narrow) \
|
||||||
f(in_T, out_T, W_T, 9216, narrow) \
|
f(in_T, out_T, W_T, 9216, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 9472, narrow) \
|
||||||
f(in_T, out_T, W_T, 10240, narrow) \
|
f(in_T, out_T, W_T, 10240, narrow) \
|
||||||
f(in_T, out_T, W_T, 11008, narrow) \
|
f(in_T, out_T, W_T, 11008, narrow) \
|
||||||
f(in_T, out_T, W_T, 11264, narrow) \
|
f(in_T, out_T, W_T, 11264, narrow) \
|
||||||
@ -131,8 +160,11 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, 13696, narrow) \
|
f(in_T, out_T, W_T, 13696, narrow) \
|
||||||
f(in_T, out_T, W_T, 13824, narrow) \
|
f(in_T, out_T, W_T, 13824, narrow) \
|
||||||
f(in_T, out_T, W_T, 14336, narrow) \
|
f(in_T, out_T, W_T, 14336, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 14784, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 14848, narrow) \
|
||||||
f(in_T, out_T, W_T, 15360, narrow) \
|
f(in_T, out_T, W_T, 15360, narrow) \
|
||||||
f(in_T, out_T, W_T, 16384, narrow) \
|
f(in_T, out_T, W_T, 16384, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 18944, narrow) \
|
||||||
f(in_T, out_T, W_T, 20480, narrow) \
|
f(in_T, out_T, W_T, 20480, narrow) \
|
||||||
f(in_T, out_T, W_T, 22016, narrow) \
|
f(in_T, out_T, W_T, 22016, narrow) \
|
||||||
f(in_T, out_T, W_T, 22528, narrow) \
|
f(in_T, out_T, W_T, 22528, narrow) \
|
||||||
@ -140,6 +172,8 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
|
|||||||
f(in_T, out_T, W_T, 27392, narrow) \
|
f(in_T, out_T, W_T, 27392, narrow) \
|
||||||
f(in_T, out_T, W_T, 27648, narrow) \
|
f(in_T, out_T, W_T, 27648, narrow) \
|
||||||
f(in_T, out_T, W_T, 28672, narrow) \
|
f(in_T, out_T, W_T, 28672, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 29568, narrow) \
|
||||||
|
f(in_T, out_T, W_T, 29696, narrow) \
|
||||||
f(in_T, out_T, W_T, 32000, narrow) \
|
f(in_T, out_T, W_T, 32000, narrow) \
|
||||||
f(in_T, out_T, W_T, 32256, narrow) \
|
f(in_T, out_T, W_T, 32256, narrow) \
|
||||||
f(in_T, out_T, W_T, 32512, narrow) \
|
f(in_T, out_T, W_T, 32512, narrow) \
|
||||||
|
|||||||
@ -49,21 +49,30 @@ H1 = H2 = [
|
|||||||
128,
|
128,
|
||||||
256,
|
256,
|
||||||
512,
|
512,
|
||||||
|
896,
|
||||||
1024,
|
1024,
|
||||||
1152,
|
1152,
|
||||||
|
1216,
|
||||||
1280,
|
1280,
|
||||||
1536,
|
1536,
|
||||||
1664,
|
1664,
|
||||||
2048,
|
2048,
|
||||||
|
2240,
|
||||||
2304,
|
2304,
|
||||||
|
2368,
|
||||||
|
2432,
|
||||||
2560,
|
2560,
|
||||||
2752,
|
2752,
|
||||||
3072,
|
3072,
|
||||||
3328,
|
3328,
|
||||||
3456,
|
3456,
|
||||||
3584,
|
3584,
|
||||||
|
3712,
|
||||||
4096,
|
4096,
|
||||||
|
4480,
|
||||||
4608,
|
4608,
|
||||||
|
4736,
|
||||||
|
4864,
|
||||||
5120,
|
5120,
|
||||||
5504,
|
5504,
|
||||||
5632,
|
5632,
|
||||||
@ -73,19 +82,27 @@ H1 = H2 = [
|
|||||||
6848,
|
6848,
|
||||||
6912,
|
6912,
|
||||||
7168,
|
7168,
|
||||||
|
7424,
|
||||||
8192,
|
8192,
|
||||||
|
8960,
|
||||||
9216,
|
9216,
|
||||||
|
9472,
|
||||||
10240,
|
10240,
|
||||||
11008,
|
11008,
|
||||||
11264,
|
11264,
|
||||||
13824,
|
13824,
|
||||||
14336,
|
14336,
|
||||||
|
14784,
|
||||||
|
14848,
|
||||||
15360,
|
15360,
|
||||||
|
18944,
|
||||||
22016,
|
22016,
|
||||||
22528,
|
22528,
|
||||||
24576,
|
24576,
|
||||||
27392,
|
27392,
|
||||||
27648,
|
27648,
|
||||||
|
29568,
|
||||||
|
29696,
|
||||||
32000,
|
32000,
|
||||||
32256,
|
32256,
|
||||||
32512,
|
32512,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user