From 82cec8b84b462da50857d08b4c1059a9cae98dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Mon, 27 May 2024 14:33:31 +0200 Subject: [PATCH] llama : use attn_factor in mscale calculation to match the rope_yarn() implementation --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 7587c5d59..fcec63cfb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11186,7 +11186,7 @@ struct llm_build_context { // We have to pre-scale kq_scale and attn_factor to make the YaRN RoPE work correctly. // See https://github.com/ggerganov/llama.cpp/discussions/7416 for detailed explanation. - const float mscale = 1.0f + hparams.rope_yarn_log_mul * logf(1.0f / freq_scale); + const float mscale = attn_factor * (1.0f + hparams.rope_yarn_log_mul * logf(1.0f / freq_scale)); const float kq_scale = 1.0f*mscale*mscale/sqrtf(float(hparams.n_embd_head_k)); const float attn_factor_scaled = 1.0f / (1.0f + 0.1f * logf(1.0f / freq_scale));