From df9135e3a9a6708bb62e6484d239e2b4ea212ed7 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 23 Jun 2023 18:41:23 +0800 Subject: [PATCH] fixing memory bugs --- gpttype_adapter.cpp | 8 ++++++-- koboldcpp.py | 2 +- llama.cpp | 4 ++-- model_adapter.cpp | 2 +- otherarch/llama_v2.cpp | 4 ++-- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d0ddaf99b..4e087bd65 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -308,8 +308,12 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in params.memory_f16 = inputs.f16_kv; params.n_ctx = inputs.max_context_length; - neox_ctx_v2.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx - = neox_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx = mpt_ctx_v3.hparams.n_ctx = params.n_ctx; + neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx + = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx + = gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx = gpt2_ctx_v3.hparams.n_ctx + = mpt_ctx_v3.hparams.n_ctx = params.n_ctx; + + bool calc_mem_with_scratch = ggml_cpu_has_gpublas(); printf("System Info: %s\n", llama_print_system_info()); SetQuantsUnshuffled(false); diff --git a/koboldcpp.py b/koboldcpp.py index aa5426262..76e94b84a 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -225,7 +225,7 @@ maxhordectx = 1024 maxhordelen = 256 modelbusy = False defaultport = 5001 -KcppVersion = "1.32" +KcppVersion = "1.32.1" showdebug = True class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): diff --git a/llama.cpp b/llama.cpp index 27d3d4a0a..aa67038e0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -80,7 +80,7 @@ static const std::map & MEM_REQ_SCRATCH0() { MODEL_3B, 256ull * MB }, { MODEL_7B, 512ull * MB }, { MODEL_13B, 512ull * MB }, - { MODEL_30B, 512ull * MB }, + { MODEL_30B, 640ull * MB }, { MODEL_65B, 1024ull * MB }, }; return k_sizes; @@ -92,7 +92,7 @@ static const std::map & MEM_REQ_SCRATCH1() { MODEL_3B, 256ull * MB }, { MODEL_7B, 512ull * MB }, { MODEL_13B, 512ull * MB }, - { MODEL_30B, 512ull * MB }, + { MODEL_30B, 640ull * MB }, { MODEL_65B, 1024ull * MB }, }; return k_sizes; diff --git a/model_adapter.cpp b/model_adapter.cpp index 547a8a1ef..da9fa193e 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -98,7 +98,7 @@ void print_tok_vec(std::vector &embd) //we need to read more to determine int32_t vocabsiz = 0; fin.read((char *) &vocabsiz, sizeof(int32_t)); - if(vocabsiz==4096) //actually the d_model for mpt + if(vocabsiz==4096 || vocabsiz==7168) //actually the d_model for mpt { fileformat = FileFormat::MPT_1; } diff --git a/otherarch/llama_v2.cpp b/otherarch/llama_v2.cpp index 167f3e9c3..2f8e168ca 100644 --- a/otherarch/llama_v2.cpp +++ b/otherarch/llama_v2.cpp @@ -59,7 +59,7 @@ static const std::map & MEM_REQ_SCRATCH0_2() { MODEL_UNKNOWN_2, 512ull * MB_2 }, { MODEL_7B_2, 512ull * MB_2 }, { MODEL_13B_2, 512ull * MB_2 }, - { MODEL_30B_2, 512ull * MB_2 }, + { MODEL_30B_2, 640ull * MB_2 }, { MODEL_65B_2, 1024ull * MB_2 }, }; return k_sizes; @@ -71,7 +71,7 @@ static const std::map & MEM_REQ_SCRATCH1_2() { MODEL_UNKNOWN_2, 512ull * MB_2 }, { MODEL_7B_2, 512ull * MB_2 }, { MODEL_13B_2, 512ull * MB_2 }, - { MODEL_30B_2, 512ull * MB_2 }, + { MODEL_30B_2, 640ull * MB_2 }, { MODEL_65B_2, 1024ull * MB_2 }, }; return k_sizes;