convert-*.py: autogen uuid

This commit is contained in:
brian khuu 2024-07-18 21:02:10 +10:00
parent 0d2c7321e9
commit be8306d795
2 changed files with 21 additions and 0 deletions

View file

@ -381,6 +381,12 @@ class Model:
# output in the same directory as the model by default
self.fname_out = self.dir_model / f"{fname_default}.gguf"
# Upon missing model uuid, generate uuid based on tensor content
if not vocab_only and self.metadata.uuid is None:
self.metadata.uuid = self.gguf_writer.generate_tensors_uuid()
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
logger.info(f"{f'%-{max_name_len}s' % f'generating general.uuid'} {self.metadata.uuid}")
self.set_type()
logger.info("Set meta model")

View file

@ -2,6 +2,8 @@ from __future__ import annotations
import logging
import os
import uuid
import hashlib
import shutil
import struct
import tempfile
@ -417,6 +419,19 @@ class GGUFWriter:
self.state = WriterState.WEIGHTS
def generate_tensors_uuid(self) -> str:
uuidv5_sha1 = hashlib.sha1()
uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes)
for tensors in self.tensors:
# relying on the fact that Python dicts preserve insertion order (since 3.7)
for name, ti in tensors.items():
assert ti.tensor is not None
assert ti.tensor.nbytes == ti.nbytes
uuidv5_sha1.update(ti.tensor.tobytes('C'))
return str(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5))
def write_tensors_to_file(self, *, progress: bool = False) -> None:
self.write_ti_data_to_file()