Add BPE models for testing
This commit is contained in:
parent
614d0bb874
commit
6168399112
1 changed files with 13 additions and 5 deletions
|
@ -156,7 +156,10 @@ def generator_custom_text_edge_cases() -> Iterator[str]:
|
||||||
'<s>a', # Phi-3 fail
|
'<s>a', # Phi-3 fail
|
||||||
'<unk><|endoftext|><s>', # Phi-3 fail
|
'<unk><|endoftext|><s>', # Phi-3 fail
|
||||||
'a\na', # TODO: Bert fail
|
'a\na', # TODO: Bert fail
|
||||||
|
'"`', # falcon
|
||||||
|
' República' # deepseek-coder, not inserted in vocab.special_tokens_cache because len==1
|
||||||
'a\xa0\xa0\x00b', # jina-v2-es
|
'a\xa0\xa0\x00b', # jina-v2-es
|
||||||
|
'one <mask>', # jina-v2-es <mask> lstrip=true
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -342,8 +345,13 @@ if __name__ == "__main__":
|
||||||
# "jina-v2-en", # WPM
|
# "jina-v2-en", # WPM
|
||||||
"gpt-2", # BPE
|
"gpt-2", # BPE
|
||||||
"llama-bpe", # BPE
|
"llama-bpe", # BPE
|
||||||
|
"falcon", # BPE
|
||||||
|
"deepseek-coder", # BPE
|
||||||
|
"deepseek-llm", # BPE
|
||||||
|
"starcoder", # BPE
|
||||||
"jina-v2-es", # BPE
|
"jina-v2-es", # BPE
|
||||||
"jina-v2-de", # BPE
|
"jina-v2-de", # BPE
|
||||||
|
"smaug-bpe" # BPE
|
||||||
"phi-2", # BPE
|
"phi-2", # BPE
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue