allow customized rope to use model set values
This commit is contained in:
parent
f4ee91abbb
commit
8b919b5b57
4 changed files with 28 additions and 18 deletions
|
@ -697,10 +697,12 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
//determine rope scaling params
|
//determine rope scaling params
|
||||||
float rope_freq_scale = 1.0f;
|
float rope_freq_scale = 1.0f;
|
||||||
float rope_freq_base = 10000.0f;
|
float rope_freq_base = 10000.0f;
|
||||||
|
bool overwriteRope = false;
|
||||||
if(inputs.rope_freq_scale>0.0f)
|
if(inputs.rope_freq_scale>0.0f)
|
||||||
{
|
{
|
||||||
rope_freq_scale = inputs.rope_freq_scale;
|
rope_freq_scale = inputs.rope_freq_scale;
|
||||||
rope_freq_base = inputs.rope_freq_base;
|
rope_freq_base = inputs.rope_freq_base;
|
||||||
|
overwriteRope = true;
|
||||||
printf("Using Custom RoPE scaling (scale:%.3f, base:%.1f).\n",rope_freq_scale,rope_freq_base);
|
printf("Using Custom RoPE scaling (scale:%.3f, base:%.1f).\n",rope_freq_scale,rope_freq_base);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -722,13 +724,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
|
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
|
||||||
(effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
|
(effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
|
||||||
|
|
||||||
if(file_format_meta.freq_base_train > rope_freq_base)
|
|
||||||
{
|
|
||||||
rope_freq_base = file_format_meta.freq_base_train;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Using automatic RoPE scaling (scale:%.3f, base:%.1f)\n",rope_freq_scale,rope_freq_base);
|
printf("Using automatic RoPE scaling. If the model has customized RoPE settings, they will be used directly instead!\n");
|
||||||
}
|
}
|
||||||
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
|
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
|
||||||
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
|
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
|
||||||
|
@ -903,8 +901,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
model_params.main_gpu = cu_parseinfo_maindevice;
|
model_params.main_gpu = cu_parseinfo_maindevice;
|
||||||
llama_ctx_params.rope_freq_base = rope_freq_base;
|
|
||||||
llama_ctx_params.rope_freq_scale = rope_freq_scale;
|
|
||||||
llama_ctx_params.n_batch = blasbatchsize;
|
llama_ctx_params.n_batch = blasbatchsize;
|
||||||
llama_ctx_params.n_threads = n_threads;
|
llama_ctx_params.n_threads = n_threads;
|
||||||
llama_ctx_params.n_threads_batch = n_blasthreads;
|
llama_ctx_params.n_threads_batch = n_blasthreads;
|
||||||
|
@ -932,6 +929,28 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_model * llamamodel = llama_load_model_from_file(modelname.c_str(), model_params);
|
llama_model * llamamodel = llama_load_model_from_file(modelname.c_str(), model_params);
|
||||||
|
if(overwriteRope)
|
||||||
|
{
|
||||||
|
llama_ctx_params.rope_freq_base = rope_freq_base;
|
||||||
|
llama_ctx_params.rope_freq_scale = rope_freq_scale;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//if the model modifes rope in any way, use the model values. Otherwise, use our automatic ones
|
||||||
|
if(llamamodel->hparams.rope_freq_base_train!=10000.0f ||
|
||||||
|
llamamodel->hparams.rope_freq_scale_train!=1.0f ||
|
||||||
|
llamamodel->hparams.rope_scaling_type_train==2)
|
||||||
|
{
|
||||||
|
printf("Automatic RoPE Scaling: Using model internal values.\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
llama_ctx_params.rope_freq_base = rope_freq_base;
|
||||||
|
llama_ctx_params.rope_freq_scale = rope_freq_scale;
|
||||||
|
printf("Automatic RoPE Scaling: Using (scale:%.3f, base:%.1f).\n", rope_freq_scale, rope_freq_base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
|
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
|
||||||
|
|
||||||
if (llama_ctx_v4 == NULL)
|
if (llama_ctx_v4 == NULL)
|
||||||
|
|
|
@ -388,7 +388,7 @@ maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.49"
|
KcppVersion = "1.50"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
@ -1452,7 +1452,7 @@ def show_new_gui():
|
||||||
labels[idx].grid_forget()
|
labels[idx].grid_forget()
|
||||||
if usehorde_var.get()==1 and (horde_name_var.get()=="koboldcpp" or horde_name_var.get()=="") and model_var.get()!="":
|
if usehorde_var.get()==1 and (horde_name_var.get()=="koboldcpp" or horde_name_var.get()=="") and model_var.get()!="":
|
||||||
basefile = os.path.basename(model_var.get())
|
basefile = os.path.basename(model_var.get())
|
||||||
horde_name_var.set(os.path.splitext(basefile)[0])
|
horde_name_var.set(sanitize_string(os.path.splitext(basefile)[0]))
|
||||||
|
|
||||||
makecheckbox(network_tab, "Configure for Horde", usehorde_var, 6, command=togglehorde)
|
makecheckbox(network_tab, "Configure for Horde", usehorde_var, 6, command=togglehorde)
|
||||||
togglehorde(1,1,1)
|
togglehorde(1,1,1)
|
||||||
|
|
|
@ -290,14 +290,6 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
}
|
}
|
||||||
int filever = gguf_get_version(ctx);
|
int filever = gguf_get_version(ctx);
|
||||||
fileformatmeta->fileversion = filever;
|
fileformatmeta->fileversion = filever;
|
||||||
|
|
||||||
//try to adapt if the rope_freq_base_train exceeds the auto one
|
|
||||||
fkey = modelarch+".rope.freq_base";
|
|
||||||
keyidx = gguf_find_key(ctx, fkey.c_str());
|
|
||||||
if (keyidx != -1) {
|
|
||||||
float fbt = gguf_get_val_f32(ctx, keyidx);
|
|
||||||
fileformatmeta->freq_base_train = (fbt > 1.0f ? fbt : 0.0f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,6 @@ struct FileFormatExtraMeta
|
||||||
{
|
{
|
||||||
int n_ctx_train = 2048;
|
int n_ctx_train = 2048;
|
||||||
int fileversion = 0;
|
int fileversion = 0;
|
||||||
float freq_base_train = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ModelLoadResult
|
enum ModelLoadResult
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue