diff --git a/src/llama.cpp b/src/llama.cpp index e0a9ad534..fbcb93e2f 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15558,6 +15558,7 @@ struct llm_tokenizer_bpe { case LLAMA_VOCAB_PRE_TYPE_STARCODER: case LLAMA_VOCAB_PRE_TYPE_REFACT: case LLAMA_VOCAB_PRE_TYPE_COMMAND_R: + case LLAMA_VOCAB_PRE_TYPE_SMOLLM: regex_exprs = { "\\p{N}", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",