From 689e38caccbe710169df866f9fd9d82aabcb3a13 Mon Sep 17 00:00:00 2001 From: Jason Stillerman Date: Sun, 21 Jul 2024 02:49:10 -0400 Subject: [PATCH] Update src/llama.cpp Co-authored-by: compilade --- src/llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama.cpp b/src/llama.cpp index c4635b110..e0a9ad534 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5533,6 +5533,7 @@ static void llm_load_vocab( } else if ( tokenizer_pre == "smollm") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMOLLM; + vocab.tokenizer_clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); }