diff --git a/src/llama.cpp b/src/llama.cpp index c4635b110..e0a9ad534 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5533,6 +5533,7 @@ static void llm_load_vocab( } else if ( tokenizer_pre == "smollm") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMOLLM; + vocab.tokenizer_clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); }