From 5eec5d6ed9dfabf94dda4fd4705c4015ad0e82e2 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 25 Apr 2023 20:34:18 +0800 Subject: [PATCH] Added backwards compatibility to an earlier version of NeoX. --- expose.cpp | 10 ++++++++-- gpttype_adapter.cpp | 16 ++++++++++------ koboldcpp.py | 2 +- model_adapter.cpp | 2 +- model_adapter.h | 1 + otherarch/neox.cpp | 39 ++++++++++++++++++++++++++++----------- 6 files changed, 49 insertions(+), 21 deletions(-) diff --git a/expose.cpp b/expose.cpp index 02f40b489..b19737106 100644 --- a/expose.cpp +++ b/expose.cpp @@ -117,10 +117,16 @@ extern "C" return true; } } - else if(file_format==FileFormat::NEOX_1) + else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2) { printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format); - ModelLoadResult lr = gpttype_load_model(inputs, file_format); + ModelLoadResult lr = gpttype_load_model(inputs, file_format); + if (lr == ModelLoadResult::RETRY_LOAD) + { + file_format = FileFormat::NEOX_1; + printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format); + lr = gpttype_load_model(inputs, file_format); + } if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD) { return false; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index df185405f..1d33a2243 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -218,13 +218,18 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in return ModelLoadResult::SUCCESS; } - else if(file_format==FileFormat::NEOX_1) + else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2) { - bool res = stablelm_model_load(params.model, neox_ctx, vocab); - if(!res) + ModelLoadResult res = stablelm_model_load(params.model, neox_ctx, vocab, file_format); + if(res==ModelLoadResult::FAIL) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); - return ModelLoadResult::FAIL; + return res; + } + else if(res==ModelLoadResult::RETRY_LOAD) + { + printf("\nIncorrect Tensor Size Detected! Retrying GPT-NeoX model loading..."); + return res; } // determine the required inference memory per token: stablelm_eval(neox_ctx, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); @@ -245,8 +250,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in } // determine the required inference memory per token: - gptj_eval(gptj_ctx_v2, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); - + gptj_eval(gptj_ctx_v2, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); //if the logits are NAN, it means the model is incompatible if(logits.size()>0 && IsNanCheck(logits[0])) diff --git a/koboldcpp.py b/koboldcpp.py index 8310b7566..35e8d2cbd 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -148,7 +148,7 @@ maxctx = 2048 maxlen = 128 modelbusy = False defaultport = 5001 -KcppVersion = "1.13.1" +KcppVersion = "1.14" class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): sys_version = "" diff --git a/model_adapter.cpp b/model_adapter.cpp index f6225dc55..6c54f3041 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -130,7 +130,7 @@ void print_tok_vec(std::vector &embd) else if(vocabsiz < 31998 || vocabsiz > 33000) { //anything outside the llama v1 range is assumed to be NeoX - fileformat = FileFormat::NEOX_1; + fileformat = FileFormat::NEOX_2; } } else if(magic == 0x67676d66) //v2 format ggmf diff --git a/model_adapter.h b/model_adapter.h index 344643d2b..5c303a1fd 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -30,6 +30,7 @@ enum FileFormat RWKV_1=300, NEOX_1=400, + NEOX_2=401, }; enum ModelLoadResult diff --git a/otherarch/neox.cpp b/otherarch/neox.cpp index 203df0983..07eb26dbc 100644 --- a/otherarch/neox.cpp +++ b/otherarch/neox.cpp @@ -17,13 +17,13 @@ // load the model's weights from a file -bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_vocab & vocab) { +ModelLoadResult stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_vocab & vocab, FileFormat file_format) { printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); - return false; + return ModelLoadResult::FAIL; } // verify magic @@ -32,7 +32,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ fin.read((char *) &magic, sizeof(magic)); if (magic != 0x67676d6c) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); - return false; + return ModelLoadResult::FAIL; } } @@ -88,7 +88,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); - return false; + return ModelLoadResult::FAIL; } } @@ -151,7 +151,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ model.ctx = ggml_init(params); if (!model.ctx) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); - return false; + return ModelLoadResult::FAIL; } } @@ -276,19 +276,19 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ if (model.tensors.find(name.data()) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data()); - return false; + return ModelLoadResult::FAIL; } auto tensor = model.tensors[name.data()]; if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); - return false; + return ModelLoadResult::FAIL; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%5d, %5d], expected [%5d, %5d]\n", __func__, name.data(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); - return false; + return ModelLoadResult::FAIL; } // for debugging @@ -296,12 +296,29 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } - const size_t bpe = ggml_type_size(ggml_type(ttype)); + size_t bpe = ggml_type_size(ggml_type(ttype)); + + if(file_format==FileFormat::NEOX_1) + { + switch (ttype) { + case 0: bpe = ggml_type_size(GGML_TYPE_F32); break; + case 1: bpe = ggml_type_size(GGML_TYPE_F16); break; + case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break; + case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break; + case 5: bpe = ggml_type_size(GGML_TYPE_Q4_2); assert(ne[0] % 64 == 0); break; + case 6: bpe = ggml_type_size(GGML_TYPE_Q4_3); assert(ne[0] % 64 == 0); break; + default: + { + fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ttype); + return ModelLoadResult::FAIL; + } + }; + } if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.data(), ggml_nbytes(tensor), nelements*bpe); - return false; + return ModelLoadResult::RETRY_LOAD; } fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); @@ -320,7 +337,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ fin.close(); - return true; + return ModelLoadResult::SUCCESS; } // evaluate the transformer