convert-*.py: add tensor hash general.hash.sha256 to kv store

This commit is contained in:
brian khuu 2024-07-23 21:51:01 +10:00
parent 751fcfc6c3
commit 60d47894f3
3 changed files with 25 additions and 0 deletions

View file

@ -384,6 +384,12 @@ class Model:
self.set_type()
# Generate sha256 based on tensor content if required
if not vocab_only:
hash_sha256 = self.gguf_writer.calculate_tensor_hash_sha256()
self.gguf_writer.add_hash_sha256(hash_sha256)
logger.info(f"tensor hash (sha256): {hash_sha256}")
logger.info("Set meta model")
self.metadata.set_gguf_meta_model(self.gguf_writer)

View file

@ -25,6 +25,9 @@ class Keys:
ALIGNMENT = "general.alignment"
FILE_TYPE = "general.file_type"
# Tensor Hash
HASH_SHA256 = "general.hash.sha256"
# Authorship Metadata
NAME = "general.name"
AUTHOR = "general.author"

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import logging
import os
import hashlib
import shutil
import struct
import tempfile
@ -417,6 +418,18 @@ class GGUFWriter:
self.state = WriterState.WEIGHTS
def calculate_tensor_hash_sha256(self) -> str:
sha256 = hashlib.sha256()
for tensors in self.tensors:
# relying on the fact that Python dicts preserve insertion order (since 3.7)
for _, ti in tensors.items():
assert ti.tensor is not None
assert ti.tensor.nbytes == ti.nbytes
sha256.update(ti.tensor.tobytes('C'))
return sha256.hexdigest()
def write_tensors_to_file(self, *, progress: bool = False) -> None:
self.write_ti_data_to_file()
@ -491,6 +504,9 @@ class GGUFWriter:
def add_file_type(self, ftype: int) -> None:
self.add_uint32(Keys.General.FILE_TYPE, ftype)
def add_hash_sha256(self, hash: str) -> None:
self.add_string(Keys.General.HASH_SHA256, hash)
def add_name(self, name: str) -> None:
self.add_string(Keys.General.NAME, name)