add exaone pre-tokenizer in llama-vocab.cpp
Co-Authored-By: compilade <113953597+compilade@users.noreply.github.com>
This commit is contained in:
parent
98ad475fbe
commit
4c401e510f
1 changed files with 1 additions and 0 deletions
|
@ -388,6 +388,7 @@ struct llm_tokenizer_bpe {
|
||||||
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
|
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
|
||||||
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
|
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
|
||||||
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
|
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
|
||||||
|
case LLAMA_VOCAB_PRE_TYPE_EXAONE:
|
||||||
regex_exprs = {
|
regex_exprs = {
|
||||||
"\\p{N}",
|
"\\p{N}",
|
||||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue