From d6fe269ced93d45783a3b37c3cb20554264e5578 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Mon, 8 Jul 2024 18:13:16 -0400 Subject: [PATCH] llama : fix command-r detokenization --- src/llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama.cpp b/src/llama.cpp index b652762d2..3509ff599 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5407,6 +5407,7 @@ static void llm_load_vocab( } else if ( tokenizer_pre == "command-r") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R; + vocab.tokenizer_clean_spaces = false; } else if ( tokenizer_pre == "qwen2") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;