dynamically determine newline token

This commit is contained in:
rabidcopy 2023-03-20 14:02:48 -05:00 committed by GitHub
parent 330b86eed2
commit 3eca29ec0d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -30,7 +30,6 @@
#define ANSI_BOLD "\x1b[1m" #define ANSI_BOLD "\x1b[1m"
static const int EOS_TOKEN_ID = 2; static const int EOS_TOKEN_ID = 2;
static const int NEWLINE_TOKEN_ID = 13;
// determine number of model parts based on the dimension // determine number of model parts based on the dimension
static const std::map<int, int> LLAMA_N_PARTS = { static const std::map<int, int> LLAMA_N_PARTS = {
@ -177,6 +176,9 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
vocab.id_to_token[i] = word; vocab.id_to_token[i] = word;
vocab.score[i] = score; vocab.score[i] = score;
// dynamically determine the newline token
const auto NEWLINE_TOKEN_ID = vocab.token_to_id["\n"];
//if (i < 30000) { //if (i < 30000) {
// fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
//} //}