Update src/llama.cpp

Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
nopperl 2024-07-22 11:46:18 +00:00 committed by GitHub
parent 05f138551f
commit 1e1e78a324
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -15850,7 +15850,7 @@ struct llm_tokenizer_bpe {
regex_exprs = {
"<sentinel:[0-9]+>", // Sentinel tokens
"(IMGIMG)((A|B|C|D|E|F|G|H|I){1,4})Z", // Image tokens
"([\t\n]| | )", // directly from tokenizer.json
"([\\t\\n]| | )", // directly from tokenizer.json
"\\p{N}", // Individual digits
"[\\p{P}\\$\\+<=>\\^~\\|`]+", // Punctuation
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",