cuda : fix performance (pow -> powf)

This commit is contained in:
Georgi Gerganov 2024-02-15 16:02:24 +02:00
parent b2c055b8af
commit 113e0d5d1b
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 4 additions and 4 deletions

View file

@ -5976,12 +5976,12 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f
const uint32_t n_head_kv = gridDim.x/nrows_y; const uint32_t n_head_kv = gridDim.x/nrows_y;
const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2((float) n_head_kv)); const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2((float) n_head_kv));
const float m0 = pow(2.0f, -(max_bias ) / n_head_log2); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
const float m1 = pow(2.0f, -(max_bias / 2.0f) / n_head_log2); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
const int h = rowx/nrows_y; // head index const int h = rowx/nrows_y; // head index
slope = h < n_head_log2 ? pow(m0, h + 1) : pow(m1, 2*(h - n_head_log2) + 1); slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1);
} }
extern __shared__ float data_soft_max_f32[]; extern __shared__ float data_soft_max_f32[];

View file

@ -4818,7 +4818,7 @@ static struct ggml_tensor * llm_build_kqv(
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_SYCL) #if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_SYCL)
#pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL") #pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL")
#pragma message(" Falling back to ggml_alibi(). Will become and error in Mar 2024") #pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024")
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488") #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
if (hparams.f_max_alibi_bias > 0.0f) { if (hparams.f_max_alibi_bias > 0.0f) {
kq = ggml_scale(ctx, kq, kq_scale); kq = ggml_scale(ctx, kq, kq_scale);