From 1657f92d2ff2652d8e3e71badf8690fc15279af9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 16 Feb 2024 10:41:38 +0200 Subject: [PATCH] llama : init kq_pos only if needed ggml-ci --- llama.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 8e872f3d7..7d6c90872 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1561,6 +1561,7 @@ struct llama_hparams { float f_max_alibi_bias = 0.0f; bool causal_attn = true; + bool need_kq_pos = false; uint32_t pooling_type = LLAMA_POOLING_NONE; @@ -3242,6 +3243,10 @@ static void llm_load_hparams( } model.ftype = ml.ftype; + + if (hparams.f_max_alibi_bias > 0.0f) { + hparams.need_kq_pos = true; + } } // TODO: This should probably be in llama.h @@ -7529,7 +7534,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { } } - { + if (hparams.need_kq_pos) { const int64_t n_kv = kv_self.n; assert(ggml_backend_buffer_is_host(lctx.inp_KQ_pos->buffer));