From 113e0d5d1bc2e47efb44ce93bd958bcd06de1b0a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 15 Feb 2024 16:02:24 +0200 Subject: [PATCH] cuda : fix performance (pow -> powf) --- ggml-cuda.cu | 6 +++--- llama.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 7285d6de5..79487391c 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -5976,12 +5976,12 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f const uint32_t n_head_kv = gridDim.x/nrows_y; const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2((float) n_head_kv)); - const float m0 = pow(2.0f, -(max_bias ) / n_head_log2); - const float m1 = pow(2.0f, -(max_bias / 2.0f) / n_head_log2); + const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); + const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); const int h = rowx/nrows_y; // head index - slope = h < n_head_log2 ? pow(m0, h + 1) : pow(m1, 2*(h - n_head_log2) + 1); + slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1); } extern __shared__ float data_soft_max_f32[]; diff --git a/llama.cpp b/llama.cpp index 1c58aba48..14d86cd87 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4818,7 +4818,7 @@ static struct ggml_tensor * llm_build_kqv( #if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_SYCL) #pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL") -#pragma message(" Falling back to ggml_alibi(). Will become and error in Mar 2024") +#pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024") #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488") if (hparams.f_max_alibi_bias > 0.0f) { kq = ggml_scale(ctx, kq, kq_scale);