llama : use new pre-tokenizer type

This commit is contained in:
Georgi Gerganov 2024-04-26 20:08:28 +03:00
parent 9b4d63ae53
commit 43e12ce8e5
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
12 changed files with 87 additions and 44 deletions

View file

@ -398,6 +398,9 @@ class Model(ABC):
if chkhsh == -3290901550109860290:
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer.json
res = "llama3"
if chkhsh == 5332289095291046364:
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-chat/blob/main/tokenizer.json
res = "deepseek-llm"
if chkhsh == 4190561703949727616:
# ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct/blob/main/tokenizer.json
res = "deepseek-coder"