From 4155e86ba0f520f85c87da5b153c6dc06257cd46 Mon Sep 17 00:00:00 2001
From: teleprint-me <77757836+teleprint-me@users.noreply.github.com>
Date: Tue, 7 May 2024 21:47:59 -0400
Subject: [PATCH] feat: Add support for qwen tokenizer

Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com>
---
 convert-hf-to-gguf-update.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
index 4b101ad26..01197bd7f 100755
--- a/convert-hf-to-gguf-update.py
+++ b/convert-hf-to-gguf-update.py
@@ -119,13 +119,20 @@ for model in models:
 
     # model and repo urls are not the same
     # url = "https://huggingface.co/Qwen/Qwen-tokenizer/raw/main/tokenizer.json"
-
-    # Get the models tokenizer
-    download_file_with_auth(
-        url=f"{url_main}/tokenizer.json",
-        token=token,
-        save_path=model_tokenizer_path
-    )
+    if name == "qwen":  # qwen is an outlier and will raise a FileNotFoundError
+        # fetch the qwen tokenizer
+        download_file_with_auth(
+            url="https://huggingface.co/Qwen/Qwen-tokenizer/raw/main/tokenizer.json",
+            token=token,
+            save_path=model_tokenizer_path
+        )
+    else:  # Et tu, Brute?
+        # Get the models tokenizer
+        download_file_with_auth(
+            url=f"{url_main}/tokenizer.json",
+            token=token,
+            save_path=model_tokenizer_path
+        )
 
     # Get the models hyper params
     download_file_with_auth(