diff --git a/llama.cpp b/llama.cpp index 25f129e57..e6ce4486b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1735,7 +1736,7 @@ static void llm_load_hparams(llama_model_loader & ml, llama_model & model, const // rope_freq_scale (inverse of the kv) is optional if (rope_scaling_type == LLAMA_ROPE_SCALING_NONE) { - hparams.rope_freq_scale = 1.0f; + hparams.rope_freq_scale = 1.0f; // never scale if scaling type is none } else if (hparams.rope_freq_scale == 0.0f) { float ropescale = 0.0f; GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALING_FACTOR)); @@ -1745,8 +1746,8 @@ static void llm_load_hparams(llama_model_loader & ml, llama_model & model, const hparams.rope_freq_scale = ropescale == 0.0f ? 1.0f : 1.0f/ropescale; } - if (rope_scaling_type == LLAMA_ROPE_SCALING_YARN) { - hparams.yarn_ext_factor = 1.0f; // enable YaRN + if (std::isnan(hparams.yarn_ext_factor)) { // NaN indicates 'not set' + hparams.yarn_ext_factor = rope_scaling_type == LLAMA_ROPE_SCALING_YARN ? 1.0f : 0.0f; } // sanity check for n_rot (optional) @@ -6268,7 +6269,7 @@ struct llama_context_params llama_context_default_params() { /*.tensor_split =*/ nullptr, /*.rope_freq_base =*/ 0.0f, /*.rope_freq_scale =*/ 0.0f, - /*.yarn_ext_factor =*/ 0.0f, + /*.yarn_ext_factor =*/ NAN, /*.yarn_attn_factor =*/ 1.0f, /*.yarn_beta_fast =*/ 32.0f, /*.yarn_beta_slow =*/ 1.0f,