From 8f5e1e0c76f642c06d36d78c42032af2e2e662cc Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Thu, 4 Jul 2024 22:30:48 +0200 Subject: [PATCH] 'viking' detokenizer clean spaces --- src/llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama.cpp b/src/llama.cpp index 4d3daaa94..5fce4816d 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5158,6 +5158,7 @@ static void llm_load_vocab( } else if ( tokenizer_pre == "viking") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_VIKING; + vocab.tokenizer_clean_spaces = false; } else if ( tokenizer_pre == "jais") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_JAIS;