diff --git a/llama.cpp b/llama.cpp index ef9809ecd..a30593087 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10978,6 +10978,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { GGML_ASSERT(hparams.rope_long_factors.size() == freq_dim); GGML_ASSERT(hparams.rope_short_factors.size() == freq_dim); + // choose long/short freq factors based on the context size auto n_ctx = llama_n_ctx(&lctx); if (n_ctx > hparams.n_yarn_orig_ctx) { ggml_backend_tensor_set(lctx.freq_factors, hparams.rope_long_factors.data(), 0, freq_dim * ggml_element_size(lctx.freq_factors));