add support to load tokenizer.model from command line argument
This commit is contained in:
parent
7438b83939
commit
6b9e424671
3 changed files with 14 additions and 36 deletions
13
main.cpp
13
main.cpp
|
@ -776,14 +776,15 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
params.model = "models/llama-7B/ggml-model.bin";
|
params.model = "models/llama-7B/ggml-model.bin";
|
||||||
|
params.tokenizer = "models/tokenizer.model";
|
||||||
sentencepiece::SentencePieceProcessor sp;
|
|
||||||
sp.Load("./models/tokenizer.model");
|
|
||||||
|
|
||||||
if (gpt_params_parse(argc, argv, params) == false) {
|
if (gpt_params_parse(argc, argv, params) == false) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sentencepiece::SentencePieceProcessor sp;
|
||||||
|
sp.Load(params.tokenizer);
|
||||||
|
|
||||||
if (params.seed < 0) {
|
if (params.seed < 0) {
|
||||||
params.seed = time(NULL);
|
params.seed = time(NULL);
|
||||||
}
|
}
|
||||||
|
@ -823,12 +824,12 @@ int main(int argc, char ** argv) {
|
||||||
std::vector<float> logits;
|
std::vector<float> logits;
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
std::vector<gpt_vocab::id> embd_inp = ::llama_tokenize(vocab, params.prompt, true);
|
std::vector<gpt_vocab::id> embd_inp = ::llama_tokenize(sp, vocab, params.prompt, true);
|
||||||
|
|
||||||
params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
|
params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
|
||||||
|
|
||||||
// tokenize the reverse prompt
|
// tokenize the reverse prompt
|
||||||
std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);
|
std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(sp, vocab, params.antiprompt, false);
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||||
|
@ -999,7 +1000,7 @@ int main(int argc, char ** argv) {
|
||||||
buf[n_read+1] = 0;
|
buf[n_read+1] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buf, false);
|
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(sp, vocab, buf, false);
|
||||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||||
|
|
||||||
remaining_tokens -= line_inp.size();
|
remaining_tokens -= line_inp.size();
|
||||||
|
|
34
utils.cpp
34
utils.cpp
|
@ -51,6 +51,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
params.n_batch = std::stoi(argv[++i]);
|
params.n_batch = std::stoi(argv[++i]);
|
||||||
} else if (arg == "-m" || arg == "--model") {
|
} else if (arg == "-m" || arg == "--model") {
|
||||||
params.model = argv[++i];
|
params.model = argv[++i];
|
||||||
|
} else if (arg == "--tokenizer") {
|
||||||
|
params.tokenizer = argv[++i];
|
||||||
} else if (arg == "-i" || arg == "--interactive") {
|
} else if (arg == "-i" || arg == "--interactive") {
|
||||||
params.interactive = true;
|
params.interactive = true;
|
||||||
} else if (arg == "--interactive-start") {
|
} else if (arg == "--interactive-start") {
|
||||||
|
@ -98,6 +100,8 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params) {
|
||||||
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||||
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
||||||
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
||||||
|
fprintf(stderr, " --tokenizer FNAME\n");
|
||||||
|
fprintf(stderr, " tokenizer path (default: %s)\n", params.model.c_str());
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,39 +278,11 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) {
|
std::vector<gpt_vocab::id> llama_tokenize(sentencepiece::SentencePieceProcessor & sp, const gpt_vocab & vocab, const std::string & text, bool bos) {
|
||||||
//auto res = gpt_tokenize(vocab, text);
|
|
||||||
|
|
||||||
//if (bos) {
|
|
||||||
// res.insert(res.begin(), 1); // TODO: replace with vocab.bos
|
|
||||||
//}
|
|
||||||
|
|
||||||
std::vector<gpt_vocab::id> res;
|
std::vector<gpt_vocab::id> res;
|
||||||
|
|
||||||
// if (bos) {
|
|
||||||
// res.push_back(1); // TODO: replace with vocab.bos
|
|
||||||
// }
|
|
||||||
|
|
||||||
sentencepiece::SentencePieceProcessor sp;
|
|
||||||
sp.Load("./models/tokenizer.model");
|
|
||||||
|
|
||||||
std::vector<std::string> pieces;
|
std::vector<std::string> pieces;
|
||||||
return sp.EncodeAsIds(text);
|
return sp.EncodeAsIds(text);
|
||||||
/*
|
|
||||||
for (const auto & piece : pieces) {
|
|
||||||
printf("piece: %s\n", piece.c_str());
|
|
||||||
if (vocab.token_to_id.count(piece) > 0) {
|
|
||||||
res.push_back(vocab.token_to_id.at(piece));
|
|
||||||
} else {
|
|
||||||
// handle unknown token
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto& id : res) {
|
|
||||||
printf("%d\n", id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
|
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
|
||||||
|
|
3
utils.h
3
utils.h
|
@ -29,6 +29,7 @@ struct gpt_params {
|
||||||
int32_t n_batch = 8; // batch size for prompt processing
|
int32_t n_batch = 8; // batch size for prompt processing
|
||||||
|
|
||||||
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
||||||
|
std::string tokenizer = "models/tokenizer.model"; // tokenizer path
|
||||||
std::string prompt;
|
std::string prompt;
|
||||||
|
|
||||||
bool use_color = false; // use color to distinguish generations and inputs
|
bool use_color = false; // use color to distinguish generations and inputs
|
||||||
|
@ -75,7 +76,7 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
|
||||||
|
|
||||||
// TODO: this is probably wrong, but I cannot figure out how this tokenizer works ..
|
// TODO: this is probably wrong, but I cannot figure out how this tokenizer works ..
|
||||||
// ref: https://github.com/google/sentencepiece
|
// ref: https://github.com/google/sentencepiece
|
||||||
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos);
|
std::vector<gpt_vocab::id> llama_tokenize(sentencepiece::SentencePieceProcessor & sp, const gpt_vocab & vocab, const std::string & text, bool bos);
|
||||||
|
|
||||||
// load the tokens from encoder.json
|
// load the tokens from encoder.json
|
||||||
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue