added support for gpt4all original format
This commit is contained in:
parent
085a9f90a7
commit
b1f08813e3
10 changed files with 38 additions and 13 deletions
15
Makefile
15
Makefile
|
@ -238,8 +238,8 @@ ggml.o: ggml.c ggml.h
|
|||
ggml_blas.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_blas.o
|
||||
|
||||
ggml_old_v1.o: otherarch/ggml_old.c otherarch/ggml_old.h
|
||||
$(CC) $(CFLAGS) -c otherarch/ggml_old.c -o ggml_old_v1.o
|
||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||
$(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
|
||||
|
||||
llama.o: llama.cpp llama.h
|
||||
$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
|
||||
|
@ -256,8 +256,8 @@ expose.o: expose.cpp expose.h
|
|||
llama_adapter.o:
|
||||
$(CXX) $(CXXFLAGS) -c llama_adapter.cpp -o llama_adapter.o
|
||||
|
||||
gptj_adapter.o: ggml_old_v1.o
|
||||
$(CXX) $(CXXFLAGS) otherarch/gptj_old.cpp otherarch/utils.cpp ggml_old_v1.o gptj_adapter.cpp -o gptj_adapter.o
|
||||
gptj_adapter.o: ggml.o
|
||||
$(CXX) $(CXXFLAGS) otherarch/gptj.cpp otherarch/utils.cpp ggml.o gptj_adapter.cpp -o gptj_adapter.o
|
||||
|
||||
clean:
|
||||
rm -vf *.o main quantize perplexity embedding main.exe quantize.exe llamacpp.dll llamacpp_blas.dll gpt2.exe gptj.exe
|
||||
|
@ -268,8 +268,11 @@ main: examples/main/main.cpp ggml.o llama.o common.o
|
|||
@echo '==== Run ./main -h for help. ===='
|
||||
@echo
|
||||
|
||||
gptj: ggml_old_v1.o
|
||||
$(CXX) $(CXXFLAGS) otherarch/gptj_old.cpp otherarch/utils.cpp ggml_old_v1.o -o gptj $(LDFLAGS)
|
||||
gptj: ggml.o
|
||||
$(CXX) $(CXXFLAGS) otherarch/gptj.cpp otherarch/utils.cpp ggml.o -o gptj $(LDFLAGS)
|
||||
|
||||
gptjold: ggml_v1.o
|
||||
$(CXX) $(CXXFLAGS) otherarch/gptj_old.cpp otherarch/utils.cpp ggml_v1.o -o gptj $(LDFLAGS)
|
||||
|
||||
|
||||
llamalib: ggml.o expose.o llama_adapter.o llamaextra.o common.o
|
||||
|
|
|
@ -35,7 +35,7 @@ bool gptj_load_model(const load_model_inputs inputs, FileFormat in_file_format)
|
|||
n_batch = params.n_batch = inputs.batch_size;
|
||||
modelname = params.model = inputs.model_filename;
|
||||
|
||||
if (!legacy_gptj_model_load(params.model, model, vocab)) {
|
||||
if (!gptj_model_load(params.model, model, vocab)) {
|
||||
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
|
||||
return false;
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ generation_outputs gptj_generate(const generation_inputs inputs, generation_outp
|
|||
printf("\rGenerating (%d / %d tokens)", (1 + params.n_predict - remaining_tokens), params.n_predict);
|
||||
}
|
||||
|
||||
if (!legacy_gptj_eval(model, params.n_threads, n_past, embd, logits, mem_per_token))
|
||||
if (!gptj_eval(model, params.n_threads, n_past, embd, logits, mem_per_token))
|
||||
{
|
||||
fprintf(stderr, "Failed to predict\n");
|
||||
snprintf(output.text, sizeof(output.text), "%s", "");
|
||||
|
|
BIN
llamacpp.dll
BIN
llamacpp.dll
Binary file not shown.
Binary file not shown.
|
@ -272,7 +272,13 @@ void print_tok_vec(std::vector<int> &embd)
|
|||
vocab.id_to_token.resize(model.hparams.n_vocab);
|
||||
std::vector<char> tmp(64);
|
||||
|
||||
for (int i = 0; i < model.hparams.n_vocab; i++) {
|
||||
int32_t vocabloops = model.hparams.n_vocab;
|
||||
if(vocabloops==32001 && legacy_file_format)
|
||||
{
|
||||
printf("---\n!! WARNING: Model appears to be GPT4ALL v1 model, triggering compatibility fix !!\n---\n");
|
||||
vocabloops -= 1;
|
||||
}
|
||||
for (int i = 0; i < vocabloops; i++) {
|
||||
uint32_t len;
|
||||
fin.read((char *) &len, sizeof(len));
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "ggml_old.h"
|
||||
#include "ggml_v1.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
|
@ -14,7 +14,19 @@
|
|||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
bool should_transpose_layer(std::string name)
|
||||
{
|
||||
|
||||
if(name.find(".mlp.fc_in.weight")!=std::string::npos ||
|
||||
name.find(".attn.out_proj.weight")!=std::string::npos ||
|
||||
name.find(".attn.q_proj.weight")!=std::string::npos ||
|
||||
name.find(".attn.k_proj.weight")!=std::string::npos ||
|
||||
name.find(".attn.v_proj.weight")!=std::string::npos)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// load the model's weights from a file
|
||||
bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab & vocab) {
|
||||
|
@ -139,6 +151,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
|
|||
|
||||
ctx_size += (5 + 10*n_layer)*256; // object overhead
|
||||
|
||||
ctx_size = ctx_size * 3 / 2;
|
||||
printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
|
||||
}
|
||||
|
||||
|
@ -279,6 +292,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
|
|||
fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
|
||||
|
@ -312,7 +326,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
|
|||
}
|
||||
|
||||
fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
|
||||
|
||||
|
||||
//printf("%42s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
||||
total_size += ggml_nbytes(tensor);
|
||||
if (++n_tensors % 8 == 0) {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "ggml_old.h"
|
||||
#include "ggml_v1.h"
|
||||
#include "otherarch.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
|
|
@ -72,3 +72,5 @@ struct gptj_model {
|
|||
|
||||
bool legacy_gptj_model_load(const std::string &fname, gptj_model &model, gpt_vocab &vocab);
|
||||
bool legacy_gptj_eval(const gptj_model &model, const int n_threads, const int n_past, const std::vector<gpt_vocab::id> &embd_inp, std::vector<float> &embd_w, size_t &mem_per_token);
|
||||
bool gptj_model_load(const std::string &fname, gptj_model &model, gpt_vocab &vocab);
|
||||
bool gptj_eval(const gptj_model &model, const int n_threads, const int n_past, const std::vector<gpt_vocab::id> &embd_inp, std::vector<float> &embd_w, size_t &mem_per_token);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue