From b2ca23c746103a0f8219365d0a58e1f621cb14fd Mon Sep 17 00:00:00 2001 From: teleprint-me <77757836+teleprint-me@users.noreply.github.com> Date: Sat, 18 May 2024 01:46:13 -0400 Subject: [PATCH] feat: Add method for generating the checksums and writing the results to a json file --- gguf-py/gguf/huggingface_hub.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gguf-py/gguf/huggingface_hub.py b/gguf-py/gguf/huggingface_hub.py index 86b8312c0..77fc69d29 100644 --- a/gguf-py/gguf/huggingface_hub.py +++ b/gguf-py/gguf/huggingface_hub.py @@ -176,4 +176,15 @@ class HFTokenizerRequest: self.resolve_tokenizer_model(filename, filepath, model) def generate_checksums(self) -> None: - pass + checksums = [] + for model in self.models: + mapping = {} + filepath = f"{self.local_path}/{model['repo']}" + tokenizer = AutoTokenizer.from_pretrained(filepath, trust_remote=True) + mapping.update(model) + mapping['checksum'] = sha256(str(tokenizer.vocab).encode()).hexdigest() + self.logger.info(f"Hashed {model['repo']} as {mapping['checksum']}") + checksums.append(mapping) + + with open(f"{self.local_path.parent}/checksums.json") as file: + json.dump(checksums, file)