From 1cdc88614a1cebd7b277be7a19fd7f36746ac269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20O=C5=BC=C3=B3g?= <58388001+SzymonOzog@users.noreply.github.com> Date: Fri, 21 Feb 2025 07:06:54 +0100 Subject: [PATCH] Missing comment explaining VDR variable in GGUF kernels (#13290) --- csrc/quantization/gguf/vecdotq.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csrc/quantization/gguf/vecdotq.cuh b/csrc/quantization/gguf/vecdotq.cuh index e00422637c65b..d0d4c74ed379b 100644 --- a/csrc/quantization/gguf/vecdotq.cuh +++ b/csrc/quantization/gguf/vecdotq.cuh @@ -37,6 +37,8 @@ static __device__ __forceinline__ int get_int_from_uint8_aligned(const uint8_t * return *((const int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment } +// VDR = vec dot ratio, how many contiguous integers each thread processes when the vec dot kernel is called +// MMVQ = mul_mat_vec_q, MMQ = mul_mat_q #define VDR_Q4_0_Q8_1_MMVQ 2 #define VDR_Q4_0_Q8_1_MMQ 4