From 03b1e6fdbd6869b115fda7e2d3d97430f8d47bbf Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 21 Dec 2024 17:28:21 -0800 Subject: [PATCH] Minor Signed-off-by: Woosuk Kwon --- csrc/prepare_inputs/copy_subranges.cu | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/csrc/prepare_inputs/copy_subranges.cu b/csrc/prepare_inputs/copy_subranges.cu index 48aa4b841321f..082b37000c65b 100644 --- a/csrc/prepare_inputs/copy_subranges.cu +++ b/csrc/prepare_inputs/copy_subranges.cu @@ -55,7 +55,16 @@ void copy_subranges(torch::Tensor& matrix_src, torch::Tensor& matrix_diff, // One thread block per row. int blocks = n; - int threads = 1024; + int threads; + if (blocks < 128) { + threads = 1024; + } else if (blocks < 256) { + threads = 512; + } else if (blocks < 512) { + threads = 256; + } else { + threads = 128; + } const at::cuda::OptionalCUDAGuard device_guard(device_of(matrix_tgt)); const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); vllm::copy_subranges_kernel<<>>(