convert-*.py: Add source uuid generation
This commit is contained in:
parent
be8306d795
commit
0c491520a8
1 changed files with 22 additions and 2 deletions
|
@ -10,6 +10,8 @@ import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import uuid
|
||||||
|
import hashlib
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
@ -255,6 +257,19 @@ class Model:
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def generate_source_tensors_uuid(self) -> str:
|
||||||
|
uuidv5_sha1 = hashlib.sha1()
|
||||||
|
uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes)
|
||||||
|
|
||||||
|
for name, data_torch in self.get_tensors():
|
||||||
|
# we don't need these
|
||||||
|
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
||||||
|
continue
|
||||||
|
data: np.ndarray = data_torch.to(torch.float64).squeeze().numpy()
|
||||||
|
uuidv5_sha1.update(data.tobytes('C'))
|
||||||
|
|
||||||
|
return str(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5))
|
||||||
|
|
||||||
def prepare_tensors(self):
|
def prepare_tensors(self):
|
||||||
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
|
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
|
||||||
|
|
||||||
|
@ -381,11 +396,15 @@ class Model:
|
||||||
# output in the same directory as the model by default
|
# output in the same directory as the model by default
|
||||||
self.fname_out = self.dir_model / f"{fname_default}.gguf"
|
self.fname_out = self.dir_model / f"{fname_default}.gguf"
|
||||||
|
|
||||||
|
# Upon missing source model uuid, generate uuid based on source tensor content
|
||||||
|
if not vocab_only and self.metadata.source_uuid is None:
|
||||||
|
self.metadata.source_uuid = self.generate_source_tensors_uuid()
|
||||||
|
logger.info(f"generating general.source_uuid: {self.metadata.source_uuid}")
|
||||||
|
|
||||||
# Upon missing model uuid, generate uuid based on tensor content
|
# Upon missing model uuid, generate uuid based on tensor content
|
||||||
if not vocab_only and self.metadata.uuid is None:
|
if not vocab_only and self.metadata.uuid is None:
|
||||||
self.metadata.uuid = self.gguf_writer.generate_tensors_uuid()
|
self.metadata.uuid = self.gguf_writer.generate_tensors_uuid()
|
||||||
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
|
logger.info(f"generating general.uuid: {self.metadata.uuid}")
|
||||||
logger.info(f"{f'%-{max_name_len}s' % f'generating general.uuid'} {self.metadata.uuid}")
|
|
||||||
|
|
||||||
self.set_type()
|
self.set_type()
|
||||||
|
|
||||||
|
@ -3468,6 +3487,7 @@ class LazyTorchTensor(gguf.LazyBase):
|
||||||
_dtype_map: dict[torch.dtype, type] = {
|
_dtype_map: dict[torch.dtype, type] = {
|
||||||
torch.float16: np.float16,
|
torch.float16: np.float16,
|
||||||
torch.float32: np.float32,
|
torch.float32: np.float32,
|
||||||
|
torch.float64: np.float64,
|
||||||
}
|
}
|
||||||
|
|
||||||
# used for safetensors slices
|
# used for safetensors slices
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue