wip
This commit is contained in:
parent
e01e373e63
commit
b692e4d2a4
8 changed files with 99 additions and 25 deletions
36
expose.cpp
36
expose.cpp
|
@ -63,7 +63,7 @@ extern "C"
|
||||||
putenv((char*)deviceenv.c_str());
|
putenv((char*)deviceenv.c_str());
|
||||||
executable_path = inputs.executable_path;
|
executable_path = inputs.executable_path;
|
||||||
|
|
||||||
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
|
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
||||||
|
@ -73,11 +73,18 @@ extern "C"
|
||||||
{
|
{
|
||||||
//if we tried 1 first, then try 3 and lastly 2
|
//if we tried 1 first, then try 3 and lastly 2
|
||||||
//otherwise if we tried 3 first, then try 2
|
//otherwise if we tried 3 first, then try 2
|
||||||
file_format = FileFormat::GPTJ_3;
|
file_format = FileFormat::GPTJ_4;
|
||||||
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format);
|
lr = gpttype_load_model(inputs, file_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
|
{
|
||||||
|
file_format = FileFormat::GPTJ_3;
|
||||||
|
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
lr = gpttype_load_model(inputs, file_format);
|
||||||
|
}
|
||||||
|
|
||||||
//lastly try format 2
|
//lastly try format 2
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
|
@ -96,11 +103,17 @@ extern "C"
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::GPT2_1||file_format==FileFormat::GPT2_2)
|
else if(file_format==FileFormat::GPT2_1||file_format==FileFormat::GPT2_2||file_format==FileFormat::GPT2_3)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
|
{
|
||||||
|
file_format = FileFormat::GPT2_3;
|
||||||
|
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
lr = gpttype_load_model(inputs, file_format);
|
||||||
|
}
|
||||||
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::GPT2_2;
|
file_format = FileFormat::GPT2_2;
|
||||||
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
@ -128,15 +141,24 @@ extern "C"
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3)
|
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format);
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::NEOX_3;
|
if(file_format==FileFormat::NEOX_2)
|
||||||
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
{
|
||||||
lr = gpttype_load_model(inputs, file_format);
|
file_format = FileFormat::NEOX_3;
|
||||||
|
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
lr = gpttype_load_model(inputs, file_format);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
file_format = FileFormat::NEOX_5;
|
||||||
|
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
|
lr = gpttype_load_model(inputs, file_format);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
|
|
2
ggml.h
2
ggml.h
|
@ -190,6 +190,8 @@
|
||||||
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
||||||
#define GGML_FILE_VERSION 1
|
#define GGML_FILE_VERSION 1
|
||||||
|
|
||||||
|
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
||||||
|
|
||||||
#define GGML_MAX_DIMS 4
|
#define GGML_MAX_DIMS 4
|
||||||
#define GGML_MAX_NODES 4096
|
#define GGML_MAX_NODES 4096
|
||||||
#define GGML_MAX_PARAMS 256
|
#define GGML_MAX_PARAMS 256
|
||||||
|
|
|
@ -329,8 +329,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
legacy_gpt2_eval(gpt2_ctx_v1, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token, file_format);
|
legacy_gpt2_eval(gpt2_ctx_v1, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token, file_format);
|
||||||
return ModelLoadResult::SUCCESS;
|
return ModelLoadResult::SUCCESS;
|
||||||
}
|
}
|
||||||
else if (file_format == FileFormat::GPT2_2)
|
else if (file_format == FileFormat::GPT2_2 || file_format==FileFormat::GPT2_3)
|
||||||
{
|
{
|
||||||
|
//newer format has bit unshuffling
|
||||||
|
SetQuantsUnshuffled(file_format == FileFormat::GPT2_3);
|
||||||
|
|
||||||
ModelLoadResult res = gpt2_model_load(params.model, gpt2_ctx_v2, vocab, file_format);
|
ModelLoadResult res = gpt2_model_load(params.model, gpt2_ctx_v2, vocab, file_format);
|
||||||
if(res==ModelLoadResult::FAIL)
|
if(res==ModelLoadResult::FAIL)
|
||||||
{
|
{
|
||||||
|
@ -372,7 +375,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
|
|
||||||
return ModelLoadResult::SUCCESS;
|
return ModelLoadResult::SUCCESS;
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3)
|
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5)
|
||||||
{
|
{
|
||||||
ModelLoadResult res = stablelm_model_load(params.model, neox_ctx, vocab, file_format);
|
ModelLoadResult res = stablelm_model_load(params.model, neox_ctx, vocab, file_format);
|
||||||
if(res==ModelLoadResult::FAIL)
|
if(res==ModelLoadResult::FAIL)
|
||||||
|
@ -385,14 +388,18 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
printf("\nIncorrect Tensor Size Detected! Retrying GPT-NeoX model loading...");
|
printf("\nIncorrect Tensor Size Detected! Retrying GPT-NeoX model loading...");
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//newer format has bit unshuffling
|
||||||
|
SetQuantsUnshuffled(file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5);
|
||||||
|
|
||||||
// determine the required inference memory per token:
|
// determine the required inference memory per token:
|
||||||
stablelm_eval(neox_ctx, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token, file_format);
|
stablelm_eval(neox_ctx, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token, file_format);
|
||||||
|
|
||||||
if(logits.size()>0 && file_format==FileFormat::NEOX_2 && !IsNanCheck(logits[0]))
|
if(logits.size()>0 && (file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_4) && !IsNanCheck(logits[0]))
|
||||||
{
|
{
|
||||||
//run the black magic eval to determine if it's redpajama. VERY UGLY HACK!
|
//run the black magic eval to determine if it's redpajama. VERY UGLY HACK!
|
||||||
std::vector<int> test_embd = ::gpt_tokenize(vocab, "1 2 3 4 5 6 7");
|
std::vector<int> test_embd = ::gpt_tokenize(vocab, "1 2 3 4 5 6 7");
|
||||||
stablelm_eval(neox_ctx, params.n_threads, 0, test_embd, logits, mem_per_token, FileFormat::NEOX_3);
|
stablelm_eval(neox_ctx, params.n_threads, 0, test_embd, logits, mem_per_token, (file_format==FileFormat::NEOX_2?FileFormat::NEOX_3:FileFormat::NEOX_5));
|
||||||
int topid = std::max_element(logits.begin(),logits.end())-logits.begin();
|
int topid = std::max_element(logits.begin(),logits.end())-logits.begin();
|
||||||
std::string predicted = vocab.id_to_token[topid].c_str();
|
std::string predicted = vocab.id_to_token[topid].c_str();
|
||||||
if(predicted.find("8") != std::string::npos)
|
if(predicted.find("8") != std::string::npos)
|
||||||
|
@ -407,6 +414,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
//newer format has bit unshuffling
|
||||||
|
SetQuantsUnshuffled(file_format == FileFormat::GPTJ_4);
|
||||||
|
|
||||||
ModelLoadResult loadresult = gptj_model_load(params.model, gptj_ctx_v2, vocab);
|
ModelLoadResult loadresult = gptj_model_load(params.model, gptj_ctx_v2, vocab);
|
||||||
if (loadresult == ModelLoadResult::FAIL)
|
if (loadresult == ModelLoadResult::FAIL)
|
||||||
{
|
{
|
||||||
|
@ -584,7 +594,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
{
|
{
|
||||||
n_vocab = gptj_ctx_v1.hparams.n_vocab;
|
n_vocab = gptj_ctx_v1.hparams.n_vocab;
|
||||||
}
|
}
|
||||||
else if(file_format == FileFormat::GPTJ_3)
|
else if(file_format == FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4)
|
||||||
{
|
{
|
||||||
n_vocab = gptj_ctx_v2.hparams.n_vocab;
|
n_vocab = gptj_ctx_v2.hparams.n_vocab;
|
||||||
}
|
}
|
||||||
|
@ -592,11 +602,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
{
|
{
|
||||||
n_vocab = gpt2_ctx_v1.hparams.n_vocab;
|
n_vocab = gpt2_ctx_v1.hparams.n_vocab;
|
||||||
}
|
}
|
||||||
else if(file_format == FileFormat::GPT2_2)
|
else if(file_format == FileFormat::GPT2_2 || file_format==FileFormat::GPT2_3)
|
||||||
{
|
{
|
||||||
n_vocab = gpt2_ctx_v2.hparams.n_vocab;
|
n_vocab = gpt2_ctx_v2.hparams.n_vocab;
|
||||||
}
|
}
|
||||||
else if(file_format == FileFormat::NEOX_1 || file_format == FileFormat::NEOX_2 || file_format == FileFormat::NEOX_3)
|
else if(file_format == FileFormat::NEOX_1 || file_format == FileFormat::NEOX_2 || file_format == FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5)
|
||||||
{
|
{
|
||||||
n_vocab = neox_ctx.hparams.n_vocab;
|
n_vocab = neox_ctx.hparams.n_vocab;
|
||||||
}
|
}
|
||||||
|
@ -678,11 +688,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
{
|
{
|
||||||
evalres = legacy_gpt2_eval(gpt2_ctx_v1, params.n_threads, n_past, embd, logits, mem_per_token, file_format);
|
evalres = legacy_gpt2_eval(gpt2_ctx_v1, params.n_threads, n_past, embd, logits, mem_per_token, file_format);
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::GPT2_2)
|
else if(file_format==FileFormat::GPT2_2 || file_format==FileFormat::GPT2_3)
|
||||||
{
|
{
|
||||||
evalres = gpt2_eval(gpt2_ctx_v2, params.n_threads, n_past, embd, logits, mem_per_token, file_format);
|
evalres = gpt2_eval(gpt2_ctx_v2, params.n_threads, n_past, embd, logits, mem_per_token, file_format);
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::NEOX_1 || file_format == FileFormat::NEOX_2 || file_format == FileFormat::NEOX_3)
|
else if(file_format==FileFormat::NEOX_1 || file_format == FileFormat::NEOX_2 || file_format == FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5)
|
||||||
{
|
{
|
||||||
evalres = stablelm_eval(neox_ctx, params.n_threads, n_past, embd, logits, mem_per_token, file_format);
|
evalres = stablelm_eval(neox_ctx, params.n_threads, n_past, embd, logits, mem_per_token, file_format);
|
||||||
}
|
}
|
||||||
|
@ -750,9 +760,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
// set the logit of the eos token (2) to zero to avoid sampling it
|
// set the logit of the eos token (2) to zero to avoid sampling it
|
||||||
if ((file_format == FileFormat::GPT2_1 ||
|
if ((file_format == FileFormat::GPT2_1 ||
|
||||||
file_format == FileFormat::GPT2_2 ||
|
file_format == FileFormat::GPT2_2 ||
|
||||||
|
file_format == FileFormat::GPT2_3 ||
|
||||||
file_format == FileFormat::GPTJ_1 ||
|
file_format == FileFormat::GPTJ_1 ||
|
||||||
file_format == FileFormat::GPTJ_2 ||
|
file_format == FileFormat::GPTJ_2 ||
|
||||||
file_format == FileFormat::GPTJ_3) &&
|
file_format == FileFormat::GPTJ_3 ||
|
||||||
|
file_format == FileFormat::GPTJ_4) &&
|
||||||
logits.size() > 50256)
|
logits.size() > 50256)
|
||||||
{
|
{
|
||||||
logits[50256] = (logits[50256] < 0 ? logits[50256] : 0);
|
logits[50256] = (logits[50256] < 0 ? logits[50256] : 0);
|
||||||
|
|
|
@ -108,9 +108,15 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
fin.read((char *)&temp, sizeof(temp)); //n_layer
|
fin.read((char *)&temp, sizeof(temp)); //n_layer
|
||||||
fin.read((char *)&temp, sizeof(temp)); //n_rot
|
fin.read((char *)&temp, sizeof(temp)); //n_rot
|
||||||
fin.read((char *)&temp, sizeof(temp)); //f16
|
fin.read((char *)&temp, sizeof(temp)); //f16
|
||||||
if(temp!=0 && temp!=1)
|
const int32_t qntvr = temp / 1000;
|
||||||
|
temp %= 1000;
|
||||||
|
if (qntvr != 0)
|
||||||
{
|
{
|
||||||
fileformat = FileFormat::GPTJ_3; //quantized format cannot be legacy type
|
fileformat = FileFormat::GPTJ_4;
|
||||||
|
}
|
||||||
|
else if (temp != 0 && temp != 1)
|
||||||
|
{
|
||||||
|
fileformat = FileFormat::GPTJ_3; //quantized format cannot be legacy type
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(vocabsiz==50257)
|
else if(vocabsiz==50257)
|
||||||
|
@ -122,15 +128,34 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
fin.read((char *)&temp, sizeof(temp)); //n_head
|
fin.read((char *)&temp, sizeof(temp)); //n_head
|
||||||
fin.read((char *)&temp, sizeof(temp)); //n_layer
|
fin.read((char *)&temp, sizeof(temp)); //n_layer
|
||||||
fin.read((char *)&temp, sizeof(temp)); //f16
|
fin.read((char *)&temp, sizeof(temp)); //f16
|
||||||
if(temp!=0 && temp!=1)
|
const int32_t qntvr = temp / 1000;
|
||||||
|
temp %= 1000;
|
||||||
|
if (qntvr != 0)
|
||||||
{
|
{
|
||||||
fileformat = FileFormat::GPT2_2; //quantized format cannot be legacy type
|
fileformat = FileFormat::GPT2_3;
|
||||||
}
|
}
|
||||||
|
else if (temp != 0 && temp != 1)
|
||||||
|
{
|
||||||
|
fileformat = FileFormat::GPT2_2; //quantized format cannot be legacy type
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if(vocabsiz < 31998 || vocabsiz > 33000)
|
else if(vocabsiz < 31998 || vocabsiz > 33000)
|
||||||
{
|
{
|
||||||
//anything outside the llama v1 range is assumed to be NeoX
|
//anything outside the llama v1 range is assumed to be NeoX
|
||||||
fileformat = FileFormat::NEOX_2;
|
fileformat = FileFormat::NEOX_4;
|
||||||
|
uint32_t temp;
|
||||||
|
fin.read((char *)&temp, sizeof(temp)); //ctx
|
||||||
|
fin.read((char *)&temp, sizeof(temp)); //n_embd
|
||||||
|
fin.read((char *)&temp, sizeof(temp)); //n_head
|
||||||
|
fin.read((char *)&temp, sizeof(temp)); //n_layer
|
||||||
|
fin.read((char *)&temp, sizeof(temp)); //n_rot
|
||||||
|
fin.read((char *)&temp, sizeof(temp)); //f16
|
||||||
|
const int32_t qntvr = temp / 1000;
|
||||||
|
temp %= 1000;
|
||||||
|
if(qntvr==0)
|
||||||
|
{
|
||||||
|
fileformat = FileFormat::NEOX_2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(magic == 0x67676d66) //v2 format ggmf
|
else if(magic == 0x67676d66) //v2 format ggmf
|
||||||
|
|
|
@ -19,20 +19,24 @@ enum FileFormat
|
||||||
GGML=1, // 1=(original llama ggml, alpaca, GPT4ALL, GPTJ header)
|
GGML=1, // 1=(original llama ggml, alpaca, GPT4ALL, GPTJ header)
|
||||||
GGHF=2, // 2=(llama ggmf)
|
GGHF=2, // 2=(llama ggmf)
|
||||||
GGJT=3, // 3=(llama ggjt)
|
GGJT=3, // 3=(llama ggjt)
|
||||||
GGJT_2=4, //newer llama format
|
GGJT_2=4, //newer llama format unshuffled
|
||||||
|
|
||||||
GPTJ_1=100, //the very first super old GPTJ format
|
GPTJ_1=100, //the very first super old GPTJ format
|
||||||
GPTJ_2=101, //pygmalion, uses old ggml lib
|
GPTJ_2=101, //pygmalion, uses old ggml lib
|
||||||
GPTJ_3=102, //uses new ggml lib
|
GPTJ_3=102, //uses new ggml lib
|
||||||
|
GPTJ_4=103, //unshuffled
|
||||||
|
|
||||||
GPT2_1=200,
|
GPT2_1=200,
|
||||||
GPT2_2=201,
|
GPT2_2=201,
|
||||||
|
GPT2_3=202, //unshuffled
|
||||||
|
|
||||||
RWKV_1=300,
|
RWKV_1=300,
|
||||||
|
|
||||||
NEOX_1=400,
|
NEOX_1=400,
|
||||||
NEOX_2=401,
|
NEOX_2=401,
|
||||||
NEOX_3=402,
|
NEOX_3=402, //redpajama
|
||||||
|
NEOX_4=403, //unshuffled
|
||||||
|
NEOX_5=404, //unshuffled redpajama
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ModelLoadResult
|
enum ModelLoadResult
|
||||||
|
|
|
@ -50,6 +50,9 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g
|
||||||
fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
|
fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
|
||||||
fin.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
fin.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
||||||
|
|
||||||
|
const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR;
|
||||||
|
hparams.ftype %= GGML_QNT_VERSION_FACTOR;
|
||||||
|
|
||||||
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
||||||
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
||||||
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||||
|
|
|
@ -51,6 +51,9 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
|
||||||
fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot));
|
fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot));
|
||||||
fin.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
fin.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
||||||
|
|
||||||
|
const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR;
|
||||||
|
hparams.ftype %= GGML_QNT_VERSION_FACTOR;
|
||||||
|
|
||||||
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
||||||
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
||||||
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||||
|
|
|
@ -49,6 +49,9 @@ ModelLoadResult stablelm_model_load(const std::string & fname, stablelm_model &
|
||||||
fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot));
|
fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot));
|
||||||
fin.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
fin.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
||||||
|
|
||||||
|
const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR;
|
||||||
|
hparams.ftype %= GGML_QNT_VERSION_FACTOR;
|
||||||
|
|
||||||
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
||||||
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
||||||
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue