From ebe4d1db3a42096cebcc2b2d289143bc0ef02d3d Mon Sep 17 00:00:00 2001 From: Liang <44948473+soundOfDestiny@users.noreply.github.com> Date: Mon, 2 Oct 2023 02:35:06 +0800 Subject: [PATCH] Fix boundary check in paged attention kernel (#1241) --- csrc/attention/attention_kernels.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/attention/attention_kernels.cu b/csrc/attention/attention_kernels.cu index 8955b503bdd1..505c63d2efd7 100644 --- a/csrc/attention/attention_kernels.cu +++ b/csrc/attention/attention_kernels.cu @@ -269,7 +269,7 @@ __global__ void single_query_cached_kv_attention_kernel( // See https://github.com/vllm-project/vllm/issues/641#issuecomment-1682544472 scalar_t* v_vec_ptr = reinterpret_cast(&v_vec); #pragma unroll - for (int j = 0; j <= V_VEC_SIZE; j++) { + for (int j = 0; j < V_VEC_SIZE; j++) { v_vec_ptr[j] = token_idx + j < context_len ? v_vec_ptr[j] : zero_value; } }