convert-*.py: model card metadata

This commit is contained in:
brian khuu 2024-05-31 14:19:53 +10:00
parent a42c2b7efc
commit 916872f72f
2 changed files with 25 additions and 9 deletions

View file

@ -10,6 +10,7 @@ import json
import os import os
import re import re
import sys import sys
import frontmatter
from enum import IntEnum from enum import IntEnum
from pathlib import Path from pathlib import Path
from hashlib import sha256 from hashlib import sha256
@ -96,6 +97,7 @@ class Model:
lazy: bool lazy: bool
part_names: list[str] part_names: list[str]
is_safetensors: bool is_safetensors: bool
model_card: dict[str, Any]
hparams: dict[str, Any] hparams: dict[str, Any]
block_count: int block_count: int
tensor_map: gguf.TensorNameMap tensor_map: gguf.TensorNameMap
@ -123,6 +125,7 @@ class Model:
self.is_safetensors = len(self.part_names) > 0 self.is_safetensors = len(self.part_names) > 0
if not self.is_safetensors: if not self.is_safetensors:
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
self.model_card = Model.load_model_card(dir_model)
self.hparams = Model.load_hparams(self.dir_model) self.hparams = Model.load_hparams(self.dir_model)
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
@ -148,10 +151,18 @@ class Model:
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
# Update any missing authorship metadata with huggingface_parameters # Update any missing authorship metadata with HuggingFace parameters or model card frontmatter
if self.metadata is not None and self.metadata.source_hf_repo is None: if self.metadata is not None:
if self.hparams is not None and "_name_or_path" in self.hparams:
self.metadata.source_hf_repo = self.hparams["_name_or_path"] # Source Hugging Face Repository
if self.metadata.source_hf_repo is None:
if self.hparams is not None and "_name_or_path" in self.hparams:
self.metadata.source_hf_repo = self.hparams["_name_or_path"]
# Model License
if self.metadata.license is None:
if self.model_card is not None and "license" in self.model_card:
self.metadata.source_hf_repo = self.model_card["license"]
# Set model name based on latest metadata either provided or calculated from environment # Set model name based on latest metadata either provided or calculated from environment
def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
@ -499,6 +510,11 @@ class Model:
return part_names return part_names
@staticmethod
def load_model_card(dir_model: Path):
with open(dir_model / "README.md", "r", encoding="utf-8") as f:
return frontmatter.load(f)
@staticmethod @staticmethod
def load_hparams(dir_model: Path): def load_hparams(dir_model: Path):
with open(dir_model / "config.json", "r", encoding="utf-8") as f: with open(dir_model / "config.json", "r", encoding="utf-8") as f:

View file

@ -29,9 +29,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
if base_name is not None: if base_name is not None:
name = base_name.strip().title().replace(' ', '_') name = base_name.strip().title().replace(' ', '-')
elif model_name is not None: elif model_name is not None:
name = model_name.strip().title().replace(' ', '_') name = model_name.strip().title().replace(' ', '-')
else: else:
name = "ggml-model" name = "ggml-model"
@ -41,10 +41,10 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
else: else:
parameters = f"-{per_model_rounded_weight_estimate}" parameters = f"-{per_model_rounded_weight_estimate}"
finetune = f"-{finetune_string.strip().title().replace(' ', '_')}" if finetune_string is not None else "" finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""
version = f"-{version_string.strip().replace(' ', '_')}" if version_string is not None else "" version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
encoding = f"-{encoding_scheme.strip().replace(' ', '_').upper()}" encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}"
return f"{name}{parameters}{finetune}{version}{encoding}" return f"{name}{parameters}{finetune}{version}{encoding}"