diff --git a/llama.cpp b/llama.cpp index 046449130..bb5cd9fe2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12315,7 +12315,7 @@ struct llm_tokenizer_bpe { break; case LLAMA_VOCAB_PRE_TYPE_FALCON: regex_exprs = { - "[\\p{P}\\$\\+<=>\\^~\\|]+", + "[\\p{P}\\$\\+<=>\\^~\\|`]+", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)", "[0-9][0-9][0-9]", };