From de3d9e3b33264bf1e93466d91a8c35c0b4a7e9af Mon Sep 17 00:00:00 2001
From: teleprint-me <77757836+teleprint-me@users.noreply.github.com>
Date: Wed, 8 May 2024 18:50:28 -0400
Subject: [PATCH] patch: Apply fix for downloading related model files

---
 convert-hf-to-gguf-update.py | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
index 23ac7ef9f..7ab1f9adb 100755
--- a/convert-hf-to-gguf-update.py
+++ b/convert-hf-to-gguf-update.py
@@ -102,8 +102,7 @@ for model in models:
     repo = model["repo"]
     tokt = model["tokt"]
 
-    # set url paths
-    url_main = f"{repo}/raw/main"
+    # NOTE: We should always be using resolve to download files
     url_resolve = f"{repo}/resolve/main"
 
     # set dir paths
@@ -138,28 +137,18 @@ for model in models:
         )
     else:  # Get the models tokenizer
         download_file_with_auth(
-            url=f"{url_main}/tokenizer.json",
+            url=f"{url_resolve}/tokenizer.json",
             token=token,
             save_path=model_tokenizer_path
         )
 
     # Get the models hyper params
     download_file_with_auth(
-        url=f"{url_main}/config.json",
+        url=f"{url_resolve}/config.json",
         token=token,
         save_path=f"{model_name_or_path}/config.json"
     )
 
-    # if downloaded file is less than 1KB, we likely need to download an LFS instead
-    if os.path.getsize(model_tokenizer_path) < 1024:
-        # remove the file
-        os.remove(model_tokenizer_path)
-        download_file_with_auth(
-            url=f"{url_resolve}/tokenizer.json",
-            token=token,
-            save_path=model_tokenizer_path
-        )
-
     # Handle sentencepiece tokenizer
     if tokt == TOKENIZER_TYPE.SPM:
         download_file_with_auth(
@@ -170,7 +159,7 @@ for model in models:
 
     # Get the tokenizer config
     download_file_with_auth(
-        url=f"{url_main}/tokenizer_config.json",
+        url=f"{url_resolve}/tokenizer_config.json",
         token=token,
         save_path=f"{model_name_or_path}/tokenizer_config.json"
     )