mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-29 01:07:11 +08:00
Minor
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
8a4180c8b6
commit
03b1e6fdbd
@ -55,7 +55,16 @@ void copy_subranges(torch::Tensor& matrix_src, torch::Tensor& matrix_diff,
|
|||||||
|
|
||||||
// One thread block per row.
|
// One thread block per row.
|
||||||
int blocks = n;
|
int blocks = n;
|
||||||
int threads = 1024;
|
int threads;
|
||||||
|
if (blocks < 128) {
|
||||||
|
threads = 1024;
|
||||||
|
} else if (blocks < 256) {
|
||||||
|
threads = 512;
|
||||||
|
} else if (blocks < 512) {
|
||||||
|
threads = 256;
|
||||||
|
} else {
|
||||||
|
threads = 128;
|
||||||
|
}
|
||||||
const at::cuda::OptionalCUDAGuard device_guard(device_of(matrix_tgt));
|
const at::cuda::OptionalCUDAGuard device_guard(device_of(matrix_tgt));
|
||||||
const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||||
vllm::copy_subranges_kernel<<<blocks, threads, 0, stream>>>(
|
vllm::copy_subranges_kernel<<<blocks, threads, 0, stream>>>(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user