From f4600e653c37849e920e3d5abdbf0111d16e21ac Mon Sep 17 00:00:00 2001 From: Jason Stillerman Date: Sun, 21 Jul 2024 03:07:33 -0400 Subject: [PATCH] handle regex --- src/llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama.cpp b/src/llama.cpp index e0a9ad534..fbcb93e2f 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15558,6 +15558,7 @@ struct llm_tokenizer_bpe { case LLAMA_VOCAB_PRE_TYPE_STARCODER: case LLAMA_VOCAB_PRE_TYPE_REFACT: case LLAMA_VOCAB_PRE_TYPE_COMMAND_R: + case LLAMA_VOCAB_PRE_TYPE_SMOLLM: regex_exprs = { "\\p{N}", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",