From 7566d1ab9b2f3b68a277ab66dede14b3846e313f Mon Sep 17 00:00:00 2001 From: Jakub Horak Date: Fri, 17 Mar 2023 18:08:03 +0100 Subject: [PATCH] Insert single space in front of the prompt - this is to match original llama tokenizer behavior --- main.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.cpp b/main.cpp index ca0fca8b3..39c5d7b76 100644 --- a/main.cpp +++ b/main.cpp @@ -845,6 +845,8 @@ int main(int argc, char ** argv) { std::vector logits; + // Add a space in front of the first character to match OG llama tokenizer behavior + params.prompt.insert(0, 1, ' '); // tokenize the prompt std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true);