diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 769d49a8b..cc3dc6a2e 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -381,6 +381,12 @@ class Model: # output in the same directory as the model by default self.fname_out = self.dir_model / f"{fname_default}.gguf" + # Upon missing model uuid, generate uuid based on tensor content + if not vocab_only and self.metadata.uuid is None: + self.metadata.uuid = self.gguf_writer.generate_tensors_uuid() + max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,") + logger.info(f"{f'%-{max_name_len}s' % f'generating general.uuid'} {self.metadata.uuid}") + self.set_type() logger.info("Set meta model") diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index ba6f53cda..8bef81102 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -2,6 +2,8 @@ from __future__ import annotations import logging import os +import uuid +import hashlib import shutil import struct import tempfile @@ -417,6 +419,19 @@ class GGUFWriter: self.state = WriterState.WEIGHTS + def generate_tensors_uuid(self) -> str: + uuidv5_sha1 = hashlib.sha1() + uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes) + + for tensors in self.tensors: + # relying on the fact that Python dicts preserve insertion order (since 3.7) + for name, ti in tensors.items(): + assert ti.tensor is not None + assert ti.tensor.nbytes == ti.nbytes + uuidv5_sha1.update(ti.tensor.tobytes('C')) + + return str(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5)) + def write_tensors_to_file(self, *, progress: bool = False) -> None: self.write_ti_data_to_file()