From a0c870db8590a229a3fbe2813d29b54f60d9c68f Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Mon, 29 Apr 2024 11:09:52 +0200 Subject: [PATCH] Fix merge --- unicode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicode.cpp b/unicode.cpp index 8e6bd738b..1c3c2b2c6 100644 --- a/unicode.cpp +++ b/unicode.cpp @@ -562,7 +562,7 @@ static std::vector unicode_regex_split_custom(const std::string & text, if (regex_expr == "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)") { bpe_offsets = unicode_regex_split_custom_gpt2(text, offsets); - } else if (regex == "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") { + } else if (regex_expr == "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") { bpe_offsets = unicode_regex_split_custom_llama3(text, offsets); }