From 3eca29ec0dc3cd98713a68079939929291248138 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 14:02:48 -0500 Subject: [PATCH] dynamically determine newline token --- main.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 8b9c98ba8..7e8853296 100644 --- a/main.cpp +++ b/main.cpp @@ -30,7 +30,6 @@ #define ANSI_BOLD "\x1b[1m" static const int EOS_TOKEN_ID = 2; -static const int NEWLINE_TOKEN_ID = 13; // determine number of model parts based on the dimension static const std::map LLAMA_N_PARTS = { @@ -177,6 +176,9 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab vocab.id_to_token[i] = word; vocab.score[i] = score; + // dynamically determine the newline token + const auto NEWLINE_TOKEN_ID = vocab.token_to_id["\n"]; + //if (i < 30000) { // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); //}