Added backwards compatibility to an earlier version of NeoX.

This commit is contained in:
Concedo 2023-04-25 20:34:18 +08:00
parent bff998f871
commit 5eec5d6ed9
6 changed files with 49 additions and 21 deletions

View file

@ -117,10 +117,16 @@ extern "C"
return true;
}
}
else if(file_format==FileFormat::NEOX_1)
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2)
{
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
if (lr == ModelLoadResult::RETRY_LOAD)
{
file_format = FileFormat::NEOX_1;
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format);
}
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
{
return false;

View file

@ -218,13 +218,18 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
return ModelLoadResult::SUCCESS;
}
else if(file_format==FileFormat::NEOX_1)
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2)
{
bool res = stablelm_model_load(params.model, neox_ctx, vocab);
if(!res)
ModelLoadResult res = stablelm_model_load(params.model, neox_ctx, vocab, file_format);
if(res==ModelLoadResult::FAIL)
{
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
return ModelLoadResult::FAIL;
return res;
}
else if(res==ModelLoadResult::RETRY_LOAD)
{
printf("\nIncorrect Tensor Size Detected! Retrying GPT-NeoX model loading...");
return res;
}
// determine the required inference memory per token:
stablelm_eval(neox_ctx, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
@ -245,8 +250,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
// determine the required inference memory per token:
gptj_eval(gptj_ctx_v2, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
gptj_eval(gptj_ctx_v2, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
//if the logits are NAN, it means the model is incompatible
if(logits.size()>0 && IsNanCheck(logits[0]))

View file

@ -148,7 +148,7 @@ maxctx = 2048
maxlen = 128
modelbusy = False
defaultport = 5001
KcppVersion = "1.13.1"
KcppVersion = "1.14"
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
sys_version = ""

View file

@ -130,7 +130,7 @@ void print_tok_vec(std::vector<float> &embd)
else if(vocabsiz < 31998 || vocabsiz > 33000)
{
//anything outside the llama v1 range is assumed to be NeoX
fileformat = FileFormat::NEOX_1;
fileformat = FileFormat::NEOX_2;
}
}
else if(magic == 0x67676d66) //v2 format ggmf

View file

@ -30,6 +30,7 @@ enum FileFormat
RWKV_1=300,
NEOX_1=400,
NEOX_2=401,
};
enum ModelLoadResult

View file

@ -17,13 +17,13 @@
// load the model's weights from a file
bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_vocab & vocab) {
ModelLoadResult stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_vocab & vocab, FileFormat file_format) {
printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
auto fin = std::ifstream(fname, std::ios::binary);
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
return false;
return ModelLoadResult::FAIL;
}
// verify magic
@ -32,7 +32,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
fin.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
return ModelLoadResult::FAIL;
}
}
@ -88,7 +88,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
{
fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n",
__func__, fname.c_str(), model.hparams.ftype);
return false;
return ModelLoadResult::FAIL;
}
}
@ -151,7 +151,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
model.ctx = ggml_init(params);
if (!model.ctx) {
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
return false;
return ModelLoadResult::FAIL;
}
}
@ -276,19 +276,19 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
if (model.tensors.find(name.data()) == model.tensors.end()) {
fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
return false;
return ModelLoadResult::FAIL;
}
auto tensor = model.tensors[name.data()];
if (ggml_nelements(tensor) != nelements) {
fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
return false;
return ModelLoadResult::FAIL;
}
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%5d, %5d], expected [%5d, %5d]\n",
__func__, name.data(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]);
return false;
return ModelLoadResult::FAIL;
}
// for debugging
@ -296,12 +296,29 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
}
const size_t bpe = ggml_type_size(ggml_type(ttype));
size_t bpe = ggml_type_size(ggml_type(ttype));
if(file_format==FileFormat::NEOX_1)
{
switch (ttype) {
case 0: bpe = ggml_type_size(GGML_TYPE_F32); break;
case 1: bpe = ggml_type_size(GGML_TYPE_F16); break;
case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break;
case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break;
case 5: bpe = ggml_type_size(GGML_TYPE_Q4_2); assert(ne[0] % 64 == 0); break;
case 6: bpe = ggml_type_size(GGML_TYPE_Q4_3); assert(ne[0] % 64 == 0); break;
default:
{
fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ttype);
return ModelLoadResult::FAIL;
}
};
}
if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
return false;
return ModelLoadResult::RETRY_LOAD;
}
fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
@ -320,7 +337,7 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
fin.close();
return true;
return ModelLoadResult::SUCCESS;
}
// evaluate the transformer