llama : add BPE pre-tokenization for Qwen2 (#7114)

* Add BPE pre-tokenization for Qwen2. * minor : fixes --------- Co-authored-by: Ren Xuancheng <17811943+jklj077@users.noreply.github.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-05-08 20:06:43 +08:00 · 2024-05-08 20:06:43 +08:00 · 229ffff872
commit 229ffff872
parent 1fd9c1741d
8 changed files with 167 additions and 2 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -314,6 +314,9 @@ class Model(ABC):
        if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
            # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
            res = "command-r"
+        if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
+            # ref: https://huggingface.co/Qwen/Qwen1.5-7B
+            res = "qwen2"
        if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
            # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
            res = "olmo"