convert-*.py: model card metadata
This commit is contained in:
parent
a42c2b7efc
commit
916872f72f
2 changed files with 25 additions and 9 deletions
|
@ -10,6 +10,7 @@ import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import frontmatter
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
@ -96,6 +97,7 @@ class Model:
|
||||||
lazy: bool
|
lazy: bool
|
||||||
part_names: list[str]
|
part_names: list[str]
|
||||||
is_safetensors: bool
|
is_safetensors: bool
|
||||||
|
model_card: dict[str, Any]
|
||||||
hparams: dict[str, Any]
|
hparams: dict[str, Any]
|
||||||
block_count: int
|
block_count: int
|
||||||
tensor_map: gguf.TensorNameMap
|
tensor_map: gguf.TensorNameMap
|
||||||
|
@ -123,6 +125,7 @@ class Model:
|
||||||
self.is_safetensors = len(self.part_names) > 0
|
self.is_safetensors = len(self.part_names) > 0
|
||||||
if not self.is_safetensors:
|
if not self.is_safetensors:
|
||||||
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
||||||
|
self.model_card = Model.load_model_card(dir_model)
|
||||||
self.hparams = Model.load_hparams(self.dir_model)
|
self.hparams = Model.load_hparams(self.dir_model)
|
||||||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
||||||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
||||||
|
@ -148,10 +151,18 @@ class Model:
|
||||||
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
||||||
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
||||||
|
|
||||||
# Update any missing authorship metadata with huggingface_parameters
|
# Update any missing authorship metadata with HuggingFace parameters or model card frontmatter
|
||||||
if self.metadata is not None and self.metadata.source_hf_repo is None:
|
if self.metadata is not None:
|
||||||
if self.hparams is not None and "_name_or_path" in self.hparams:
|
|
||||||
self.metadata.source_hf_repo = self.hparams["_name_or_path"]
|
# Source Hugging Face Repository
|
||||||
|
if self.metadata.source_hf_repo is None:
|
||||||
|
if self.hparams is not None and "_name_or_path" in self.hparams:
|
||||||
|
self.metadata.source_hf_repo = self.hparams["_name_or_path"]
|
||||||
|
|
||||||
|
# Model License
|
||||||
|
if self.metadata.license is None:
|
||||||
|
if self.model_card is not None and "license" in self.model_card:
|
||||||
|
self.metadata.source_hf_repo = self.model_card["license"]
|
||||||
|
|
||||||
# Set model name based on latest metadata either provided or calculated from environment
|
# Set model name based on latest metadata either provided or calculated from environment
|
||||||
def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
|
def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
|
||||||
|
@ -499,6 +510,11 @@ class Model:
|
||||||
|
|
||||||
return part_names
|
return part_names
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load_model_card(dir_model: Path):
|
||||||
|
with open(dir_model / "README.md", "r", encoding="utf-8") as f:
|
||||||
|
return frontmatter.load(f)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load_hparams(dir_model: Path):
|
def load_hparams(dir_model: Path):
|
||||||
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
||||||
|
|
|
@ -29,9 +29,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
if base_name is not None:
|
if base_name is not None:
|
||||||
name = base_name.strip().title().replace(' ', '_')
|
name = base_name.strip().title().replace(' ', '-')
|
||||||
elif model_name is not None:
|
elif model_name is not None:
|
||||||
name = model_name.strip().title().replace(' ', '_')
|
name = model_name.strip().title().replace(' ', '-')
|
||||||
else:
|
else:
|
||||||
name = "ggml-model"
|
name = "ggml-model"
|
||||||
|
|
||||||
|
@ -41,10 +41,10 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
|
||||||
else:
|
else:
|
||||||
parameters = f"-{per_model_rounded_weight_estimate}"
|
parameters = f"-{per_model_rounded_weight_estimate}"
|
||||||
|
|
||||||
finetune = f"-{finetune_string.strip().title().replace(' ', '_')}" if finetune_string is not None else ""
|
finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""
|
||||||
|
|
||||||
version = f"-{version_string.strip().replace(' ', '_')}" if version_string is not None else ""
|
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
||||||
|
|
||||||
encoding = f"-{encoding_scheme.strip().replace(' ', '_').upper()}"
|
encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}"
|
||||||
|
|
||||||
return f"{name}{parameters}{finetune}{version}{encoding}"
|
return f"{name}{parameters}{finetune}{version}{encoding}"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue