From 4a37251a186a1a35ac3ddd3ec496b40b502f9fde Mon Sep 17 00:00:00 2001 From: Jan Ploski Date: Fri, 16 Jun 2023 19:36:27 +0200 Subject: [PATCH 1/2] Fixed bos/eos token (which is both 11 according to config.json of Falcon-7B/40B). Also: do not auto-insert a space or (b|e)os at the beginning of prompt (seems to be LLaMA-specific). --- examples/falcon/falcon_main.cpp | 6 ++---- libfalcon.cpp | 10 ++-------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/examples/falcon/falcon_main.cpp b/examples/falcon/falcon_main.cpp index efd677905..a20bb655a 100644 --- a/examples/falcon/falcon_main.cpp +++ b/examples/falcon/falcon_main.cpp @@ -175,10 +175,8 @@ int main(int argc, char ** argv) { std::vector embd_inp; if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) { - // Add a space in front of the first character to match OG llama tokenizer behavior - params.prompt.insert(0, 1, ' '); - - embd_inp = ::falcon_tokenize(ctx, params.prompt, true); + // Falcon does not have a dedicated bos token (bos==eos), so don't inject it here + embd_inp = ::falcon_tokenize(ctx, params.prompt, false); } else { embd_inp = session_tokens; } diff --git a/libfalcon.cpp b/libfalcon.cpp index 227c09076..dcf627ec8 100644 --- a/libfalcon.cpp +++ b/libfalcon.cpp @@ -1348,12 +1348,6 @@ static bool falcon_eval_internal( const int n_threads, const char * cgraph_fname) { - // enforce that the first token is BOS - if (n_past == 0 && tokens[0] != falcon_token_bos()) { - fprintf(stderr, "%s: first token must be BOS\n", __func__); - return false; - } - const int64_t t_start_us = ggml_time_us(); const int N = n_tokens; @@ -3389,11 +3383,11 @@ const char * falcon_token_to_str(const struct falcon_context * ctx, llama_token } llama_token falcon_token_bos() { - return 1; + return 11; } llama_token falcon_token_eos() { - return 2; + return 11; } llama_token falcon_token_nl() { From 3d6ed185420204d2c41bce1a34a134545ef75bfb Mon Sep 17 00:00:00 2001 From: Jan Ploski Date: Fri, 16 Jun 2023 19:53:38 +0200 Subject: [PATCH 2/2] Correction to 4a37251a - since we did not insert the bos token, do not need attempt to rescue it during context swap --- examples/falcon/falcon_main.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/falcon/falcon_main.cpp b/examples/falcon/falcon_main.cpp index a20bb655a..f0415a65b 100644 --- a/examples/falcon/falcon_main.cpp +++ b/examples/falcon/falcon_main.cpp @@ -359,9 +359,6 @@ int main(int argc, char ** argv) { if (n_past + (int) embd.size() > n_ctx) { const int n_left = n_past - params.n_keep; - // always keep the first token - BOS - n_past = std::max(1, params.n_keep); - // insert n_left/2 tokens at the start of embd from last_n_tokens embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left/2 - embd.size(), last_n_tokens.end() - embd.size());