diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 08b60b3ef..cbbd2e7df 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -228,7 +228,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in if(file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2) { //newer format has bit unshuffling - SetQuantsUnshuffled(file_format== FileFormat::GGJT_2); + SetQuantsUnshuffled(file_format == FileFormat::GGJT_2); llama_ctx_params = llama_context_default_params(); llama_ctx_params.n_ctx = inputs.max_context_length; @@ -248,7 +248,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in } if (file_format < FileFormat::GGJT_2) { - printf("\n---\nWarning: Your model has an INVALID or OUTDATED format (ver %d). Please reconvert it for better results!\n---\n", file_format); + printf("\n---\nWarning: Your model may be an OUTDATED format (ver %d). Please reconvert it for better results!\n---\n", file_format); } if (lora_filename != "") diff --git a/model_adapter.cpp b/model_adapter.cpp index 3c0543724..487feb043 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -146,9 +146,17 @@ void print_tok_vec(std::vector &embd) else if(magic == 0x67676a74) //v3 format ggjt { fileformat = FileFormat::GGJT_2; //ggjt by default - uint32_t temp; - fin.read((char *)&temp, sizeof(temp)); //file version - if(temp==1) + uint32_t ver, temp, ftype; + fin.read((char *)&ver, sizeof(ver)); //file version + fin.read((char *)&temp, sizeof(temp));//vocab + fin.read((char *)&temp, sizeof(temp)); //embd + fin.read((char *)&temp, sizeof(temp)); //mult + fin.read((char *)&temp, sizeof(temp));//head + fin.read((char *)&temp, sizeof(temp));//layer + fin.read((char *)&temp, sizeof(temp));//rot + fin.read((char *)&ftype, sizeof(ftype));//filetype + + if(ver==1 || ftype==7) //q8 formats treat as old one { fileformat = FileFormat::GGJT; }