From 1fb5b55894359ef1ac88266157d209deb77cb370 Mon Sep 17 00:00:00 2001 From: Haoxiang Fei Date: Fri, 10 May 2024 18:18:01 +0800 Subject: [PATCH] fix: copy to fix fallthrough --- llama.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index af005c12c..152d15e56 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12208,7 +12208,6 @@ struct llm_tokenizer_bpe { switch (vocab.type_pre) { case LLAMA_VOCAB_PRE_TYPE_LLAMA3: ignore_merges = true; - case LLAMA_VOCAB_PRE_TYPE_DBRX: word_collection = unicode_regex_split(text, { // original regex from tokenizer.json //"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", @@ -12217,6 +12216,12 @@ struct llm_tokenizer_bpe { "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", }); break; + case LLAMA_VOCAB_PRE_TYPE_DBRX: + word_collection = unicode_regex_split(text, { + // same as llama3 + "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", + }); + break; case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: word_collection = unicode_regex_split(text, { "[\r\n]",