gguf_writer.py: generate tensor uuid if missing
This commit is contained in:
parent
4dc8ddd35a
commit
007708e32d
3 changed files with 21 additions and 55 deletions
|
@ -432,6 +432,11 @@ class Model:
|
||||||
|
|
||||||
def write(self):
|
def write(self):
|
||||||
self.write_tensors()
|
self.write_tensors()
|
||||||
|
|
||||||
|
if self.metadata.uuid is None:
|
||||||
|
self.metadata.uuid = self.gguf_writer.generate_tensors_uuid()
|
||||||
|
logger.info("generating general.uuid (based on tensor content) {0}".format(self.metadata.uuid))
|
||||||
|
|
||||||
self.gguf_writer.write_header_to_file(self.fname_out)
|
self.gguf_writer.write_header_to_file(self.fname_out)
|
||||||
self.gguf_writer.write_kv_data_to_file()
|
self.gguf_writer.write_kv_data_to_file()
|
||||||
self.gguf_writer.write_tensors_to_file(progress=True)
|
self.gguf_writer.write_tensors_to_file(progress=True)
|
||||||
|
|
|
@ -2,6 +2,8 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import uuid
|
||||||
|
import hashlib
|
||||||
import shutil
|
import shutil
|
||||||
import struct
|
import struct
|
||||||
import tempfile
|
import tempfile
|
||||||
|
@ -115,6 +117,7 @@ class GGUFWriter:
|
||||||
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
|
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
|
||||||
# allow calling this multiple times as long as the path is the same
|
# allow calling this multiple times as long as the path is the same
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.state is not WriterState.NO_FILE:
|
if self.state is not WriterState.NO_FILE:
|
||||||
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
|
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
|
||||||
|
|
||||||
|
@ -366,6 +369,19 @@ class GGUFWriter:
|
||||||
|
|
||||||
self.state = WriterState.WEIGHTS
|
self.state = WriterState.WEIGHTS
|
||||||
|
|
||||||
|
def generate_tensors_uuid(self) -> None:
|
||||||
|
uuidv5_sha1 = hashlib.sha1()
|
||||||
|
uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes)
|
||||||
|
|
||||||
|
for tensors in self.tensors:
|
||||||
|
# relying on the fact that Python dicts preserve insertion order (since 3.7)
|
||||||
|
for name, ti in tensors.items():
|
||||||
|
assert ti.tensor is not None
|
||||||
|
assert ti.tensor.nbytes == ti.nbytes
|
||||||
|
uuidv5_sha1.update(ti.tensor.tobytes('C'))
|
||||||
|
|
||||||
|
return uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5)
|
||||||
|
|
||||||
def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
||||||
self.write_ti_data_to_file()
|
self.write_ti_data_to_file()
|
||||||
|
|
||||||
|
|
|
@ -95,9 +95,6 @@ class Metadata:
|
||||||
if model_name is not None:
|
if model_name is not None:
|
||||||
metadata.name = model_name
|
metadata.name = model_name
|
||||||
|
|
||||||
# If any UUID is still missing at this point, then we should fill it in
|
|
||||||
metadata = Metadata.generate_any_missing_uuid(metadata)
|
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -334,55 +331,3 @@ class Metadata:
|
||||||
metadata.parameter_class_attribute = parameter_class_attribute
|
metadata.parameter_class_attribute = parameter_class_attribute
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def generate_any_missing_uuid(metadata: Metadata) -> Metadata:
|
|
||||||
|
|
||||||
# UUID Generation if not already provided
|
|
||||||
if metadata.uuid is None:
|
|
||||||
# Generate UUID based on provided links/id. UUIDv4 used as fallback
|
|
||||||
new_uuid = None
|
|
||||||
|
|
||||||
if metadata.doi is not None:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, f"https://doi.org/{metadata.doi}")
|
|
||||||
elif metadata.repo_url is not None:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.repo_url)
|
|
||||||
elif metadata.url is not None:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.url)
|
|
||||||
else:
|
|
||||||
new_uuid = uuid.uuid4() # every model must have at least a random UUIDv4
|
|
||||||
|
|
||||||
if new_uuid is not None:
|
|
||||||
metadata.uuid = str(new_uuid)
|
|
||||||
|
|
||||||
if metadata.source_uuid is None:
|
|
||||||
# Generate a UUID based on provided links/id only if source provided
|
|
||||||
new_uuid = None
|
|
||||||
|
|
||||||
if metadata.source_doi is not None:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, f"https://doi.org/{metadata.source_doi}")
|
|
||||||
elif metadata.source_repo_url is not None:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.source_repo_url)
|
|
||||||
elif metadata.source_url is not None:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.source_url)
|
|
||||||
|
|
||||||
if new_uuid is not None:
|
|
||||||
metadata.source_uuid = str(new_uuid)
|
|
||||||
|
|
||||||
if metadata.base_models is not None:
|
|
||||||
for model_entry in metadata.base_models:
|
|
||||||
if "uuid" not in model_entry:
|
|
||||||
# Generate a UUID based on provided links/id only if source provided
|
|
||||||
new_uuid = None
|
|
||||||
|
|
||||||
if "repo_url" in model_entry:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["repo_url"])
|
|
||||||
elif "url" in model_entry:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["url"])
|
|
||||||
elif "doi" in model_entry:
|
|
||||||
new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["doi"])
|
|
||||||
|
|
||||||
if new_uuid is not None:
|
|
||||||
model_entry["uuid"] = str(new_uuid)
|
|
||||||
|
|
||||||
return metadata
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue