Fix falcon punctuation regex
This commit is contained in:
parent
0794b77714
commit
51e933a962
1 changed files with 1 additions and 1 deletions
|
@ -12315,7 +12315,7 @@ struct llm_tokenizer_bpe {
|
|||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_FALCON:
|
||||
regex_exprs = {
|
||||
"[\\p{P}\\$\\+<=>\\^~\\|]+",
|
||||
"[\\p{P}\\$\\+<=>\\^~\\|`]+",
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
"[0-9][0-9][0-9]",
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue