diff --git a/llama.cpp b/llama.cpp index c2799ecb7..f563c52be 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10946,15 +10946,10 @@ struct llm_build_context { bool is_lite = (hparams.n_layer == 27); + // We have to pre-scale kq_scale and attn_factor to make the YaRN RoPE work correctly. + // See https://github.com/ggerganov/llama.cpp/discussions/7416 for detailed explanation. const float mscale = 1.0f + 0.1f * hparams.mscale_all_dim * logf(1.0f / freq_scale); const float kq_scale = 1.0f*mscale*mscale/sqrtf(float(hparams.n_embd_head_k)); - - // DeepSeek-V2 uses non-standard YaRN mscale calculation from mscale and mscale_all_dim - // config.json parameters. However, both of these are equal to 0.707 in released models, - // which results in the final mscale value equal to 1.0. To get the same value we - // pre-scale the attn_factor. - // TODO Get rid of this when other models start using DeepSeek-V2 - // variant of mscale calculation resulting in the API change. const float attn_factor_scaled = 1.0f / (1.0f + 0.1f * logf(1.0f / freq_scale)); // kept original names of these parameters from HF transformers code for clarity