cuda : fix performance (pow -> powf)
This commit is contained in:
parent
b2c055b8af
commit
113e0d5d1b
2 changed files with 4 additions and 4 deletions
|
@ -5976,12 +5976,12 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f
|
|||
const uint32_t n_head_kv = gridDim.x/nrows_y;
|
||||
const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2((float) n_head_kv));
|
||||
|
||||
const float m0 = pow(2.0f, -(max_bias ) / n_head_log2);
|
||||
const float m1 = pow(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
||||
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
|
||||
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
||||
|
||||
const int h = rowx/nrows_y; // head index
|
||||
|
||||
slope = h < n_head_log2 ? pow(m0, h + 1) : pow(m1, 2*(h - n_head_log2) + 1);
|
||||
slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1);
|
||||
}
|
||||
|
||||
extern __shared__ float data_soft_max_f32[];
|
||||
|
|
|
@ -4818,7 +4818,7 @@ static struct ggml_tensor * llm_build_kqv(
|
|||
|
||||
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_SYCL)
|
||||
#pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL")
|
||||
#pragma message(" Falling back to ggml_alibi(). Will become and error in Mar 2024")
|
||||
#pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024")
|
||||
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
|
||||
if (hparams.f_max_alibi_bias > 0.0f) {
|
||||
kq = ggml_scale(ctx, kq, kq_scale);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue