diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index ca64e2021..a02b0a45f 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -606,6 +606,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f)))))))); + if(file_format_meta.freq_base_train > rope_freq_base) + { + rope_freq_base = file_format_meta.freq_base_train; + } } printf("Using automatic RoPE scaling (scale:%.3f, base:%.1f)\n",rope_freq_scale,rope_freq_base); diff --git a/model_adapter.cpp b/model_adapter.cpp index ec57d4c9c..144b2df2b 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -290,6 +290,14 @@ void print_tok_vec(std::vector &embd) } int filever = gguf_get_version(ctx); fileformatmeta->fileversion = filever; + + //try to adapt if the rope_freq_base_train exceeds the auto one + fkey = modelarch+".rope.freq_base"; + auto keyidx = gguf_find_key(ctx, fkey.c_str()); + if (keyidx != -1) { + float fbt = gguf_get_val_f32(ctx, keyidx); + fileformatmeta->freq_base_train = (fbt > 1.0f ? fbt : 0.0f); + } } gguf_free(ctx); } diff --git a/model_adapter.h b/model_adapter.h index 65536c6d4..bb859dfa1 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -55,6 +55,7 @@ struct FileFormatExtraMeta { int n_ctx_train = 2048; int fileversion = 0; + float freq_base_train = 0; }; enum ModelLoadResult