From 4155e86ba0f520f85c87da5b153c6dc06257cd46 Mon Sep 17 00:00:00 2001 From: teleprint-me <77757836+teleprint-me@users.noreply.github.com> Date: Tue, 7 May 2024 21:47:59 -0400 Subject: [PATCH] feat: Add support for qwen tokenizer Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com> --- convert-hf-to-gguf-update.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py index 4b101ad26..01197bd7f 100755 --- a/convert-hf-to-gguf-update.py +++ b/convert-hf-to-gguf-update.py @@ -119,13 +119,20 @@ for model in models: # model and repo urls are not the same # url = "https://huggingface.co/Qwen/Qwen-tokenizer/raw/main/tokenizer.json" - - # Get the models tokenizer - download_file_with_auth( - url=f"{url_main}/tokenizer.json", - token=token, - save_path=model_tokenizer_path - ) + if name == "qwen": # qwen is an outlier and will raise a FileNotFoundError + # fetch the qwen tokenizer + download_file_with_auth( + url="https://huggingface.co/Qwen/Qwen-tokenizer/raw/main/tokenizer.json", + token=token, + save_path=model_tokenizer_path + ) + else: # Et tu, Brute? + # Get the models tokenizer + download_file_with_auth( + url=f"{url_main}/tokenizer.json", + token=token, + save_path=model_tokenizer_path + ) # Get the models hyper params download_file_with_auth(