convert-*.py: add tensor hash general.hash.sha256 to kv store
This commit is contained in:
parent
751fcfc6c3
commit
60d47894f3
3 changed files with 25 additions and 0 deletions
|
@ -384,6 +384,12 @@ class Model:
|
||||||
|
|
||||||
self.set_type()
|
self.set_type()
|
||||||
|
|
||||||
|
# Generate sha256 based on tensor content if required
|
||||||
|
if not vocab_only:
|
||||||
|
hash_sha256 = self.gguf_writer.calculate_tensor_hash_sha256()
|
||||||
|
self.gguf_writer.add_hash_sha256(hash_sha256)
|
||||||
|
logger.info(f"tensor hash (sha256): {hash_sha256}")
|
||||||
|
|
||||||
logger.info("Set meta model")
|
logger.info("Set meta model")
|
||||||
self.metadata.set_gguf_meta_model(self.gguf_writer)
|
self.metadata.set_gguf_meta_model(self.gguf_writer)
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,9 @@ class Keys:
|
||||||
ALIGNMENT = "general.alignment"
|
ALIGNMENT = "general.alignment"
|
||||||
FILE_TYPE = "general.file_type"
|
FILE_TYPE = "general.file_type"
|
||||||
|
|
||||||
|
# Tensor Hash
|
||||||
|
HASH_SHA256 = "general.hash.sha256"
|
||||||
|
|
||||||
# Authorship Metadata
|
# Authorship Metadata
|
||||||
NAME = "general.name"
|
NAME = "general.name"
|
||||||
AUTHOR = "general.author"
|
AUTHOR = "general.author"
|
||||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import hashlib
|
||||||
import shutil
|
import shutil
|
||||||
import struct
|
import struct
|
||||||
import tempfile
|
import tempfile
|
||||||
|
@ -417,6 +418,18 @@ class GGUFWriter:
|
||||||
|
|
||||||
self.state = WriterState.WEIGHTS
|
self.state = WriterState.WEIGHTS
|
||||||
|
|
||||||
|
def calculate_tensor_hash_sha256(self) -> str:
|
||||||
|
sha256 = hashlib.sha256()
|
||||||
|
|
||||||
|
for tensors in self.tensors:
|
||||||
|
# relying on the fact that Python dicts preserve insertion order (since 3.7)
|
||||||
|
for _, ti in tensors.items():
|
||||||
|
assert ti.tensor is not None
|
||||||
|
assert ti.tensor.nbytes == ti.nbytes
|
||||||
|
sha256.update(ti.tensor.tobytes('C'))
|
||||||
|
|
||||||
|
return sha256.hexdigest()
|
||||||
|
|
||||||
def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
def write_tensors_to_file(self, *, progress: bool = False) -> None:
|
||||||
self.write_ti_data_to_file()
|
self.write_ti_data_to_file()
|
||||||
|
|
||||||
|
@ -491,6 +504,9 @@ class GGUFWriter:
|
||||||
def add_file_type(self, ftype: int) -> None:
|
def add_file_type(self, ftype: int) -> None:
|
||||||
self.add_uint32(Keys.General.FILE_TYPE, ftype)
|
self.add_uint32(Keys.General.FILE_TYPE, ftype)
|
||||||
|
|
||||||
|
def add_hash_sha256(self, hash: str) -> None:
|
||||||
|
self.add_string(Keys.General.HASH_SHA256, hash)
|
||||||
|
|
||||||
def add_name(self, name: str) -> None:
|
def add_name(self, name: str) -> None:
|
||||||
self.add_string(Keys.General.NAME, name)
|
self.add_string(Keys.General.NAME, name)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue