diff --git a/csrc/quantization/gguf/ggml-common.h b/csrc/quantization/gguf/ggml-common.h index 99a7ea0fb277e..6bef5db3ccf15 100644 --- a/csrc/quantization/gguf/ggml-common.h +++ b/csrc/quantization/gguf/ggml-common.h @@ -1090,6 +1090,11 @@ __device__ __forceinline__ c10::BFloat16 convert_from_half(half v #endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 } +template<> +__device__ __forceinline__ float convert_from_half(half val) { + return __half2float(val); +} + #if defined(USE_ROCM) #ifndef __has_builtin