hotfix to revert the auto ctx scaling first, i didnt do it properly
This commit is contained in:
parent
5cd0309610
commit
b6914ebd04
2 changed files with 6 additions and 6 deletions
|
@ -435,11 +435,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
{
|
{
|
||||||
//approximate NTK aware ctx
|
//approximate NTK aware ctx
|
||||||
auto effectivenctx = params.n_ctx;
|
auto effectivenctx = params.n_ctx;
|
||||||
if((file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON) && llama_ctx_v4->model.hparams.n_ctx_train>2048)
|
// if((file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON) && llama_ctx_v4->model.hparams.n_ctx_train>2048)
|
||||||
{
|
// {
|
||||||
float factor = llama_ctx_v4->model.hparams.n_ctx_train/2048;
|
// float factor = llama_ctx_v4->model.hparams.n_ctx_train/2048;
|
||||||
effectivenctx = effectivenctx/factor;
|
// effectivenctx = effectivenctx/factor;
|
||||||
}
|
// }
|
||||||
rope_freq_base = (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f)))));
|
rope_freq_base = (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f)))));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -324,7 +324,7 @@ maxhordectx = 1024
|
||||||
maxhordelen = 256
|
maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.42"
|
KcppVersion = "1.42.1"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue