From 1e1e78a3246cdd23513f0b56795b6db9324bea18 Mon Sep 17 00:00:00 2001 From: nopperl <54780682+nopperl@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:46:18 +0000 Subject: [PATCH] Update src/llama.cpp Co-authored-by: compilade --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index d19c5cf8f..db41ad629 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15850,7 +15850,7 @@ struct llm_tokenizer_bpe { regex_exprs = { "", // Sentinel tokens "(IMGIMG)((A|B|C|D|E|F|G|H|I){1,4})Z", // Image tokens - "([\t\n]| | )", // directly from tokenizer.json + "([\\t\\n]| | )", // directly from tokenizer.json "\\p{N}", // Individual digits "[\\p{P}\\$\\+<=>\\^~\\|`]+", // Punctuation "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",