From de3d9e3b33264bf1e93466d91a8c35c0b4a7e9af Mon Sep 17 00:00:00 2001 From: teleprint-me <77757836+teleprint-me@users.noreply.github.com> Date: Wed, 8 May 2024 18:50:28 -0400 Subject: [PATCH] patch: Apply fix for downloading related model files --- convert-hf-to-gguf-update.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py index 23ac7ef9f..7ab1f9adb 100755 --- a/convert-hf-to-gguf-update.py +++ b/convert-hf-to-gguf-update.py @@ -102,8 +102,7 @@ for model in models: repo = model["repo"] tokt = model["tokt"] - # set url paths - url_main = f"{repo}/raw/main" + # NOTE: We should always be using resolve to download files url_resolve = f"{repo}/resolve/main" # set dir paths @@ -138,28 +137,18 @@ for model in models: ) else: # Get the models tokenizer download_file_with_auth( - url=f"{url_main}/tokenizer.json", + url=f"{url_resolve}/tokenizer.json", token=token, save_path=model_tokenizer_path ) # Get the models hyper params download_file_with_auth( - url=f"{url_main}/config.json", + url=f"{url_resolve}/config.json", token=token, save_path=f"{model_name_or_path}/config.json" ) - # if downloaded file is less than 1KB, we likely need to download an LFS instead - if os.path.getsize(model_tokenizer_path) < 1024: - # remove the file - os.remove(model_tokenizer_path) - download_file_with_auth( - url=f"{url_resolve}/tokenizer.json", - token=token, - save_path=model_tokenizer_path - ) - # Handle sentencepiece tokenizer if tokt == TOKENIZER_TYPE.SPM: download_file_with_auth( @@ -170,7 +159,7 @@ for model in models: # Get the tokenizer config download_file_with_auth( - url=f"{url_main}/tokenizer_config.json", + url=f"{url_resolve}/tokenizer_config.json", token=token, save_path=f"{model_name_or_path}/tokenizer_config.json" )