From 916872f72f19912582297dd4d3da41f027504b82 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Fri, 31 May 2024 14:19:53 +1000 Subject: [PATCH] convert-*.py: model card metadata --- convert_hf_to_gguf.py | 24 ++++++++++++++++++++---- gguf-py/gguf/utility.py | 10 +++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a79898350..9fe81d1a2 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -10,6 +10,7 @@ import json import os import re import sys +import frontmatter from enum import IntEnum from pathlib import Path from hashlib import sha256 @@ -96,6 +97,7 @@ class Model: lazy: bool part_names: list[str] is_safetensors: bool + model_card: dict[str, Any] hparams: dict[str, Any] block_count: int tensor_map: gguf.TensorNameMap @@ -123,6 +125,7 @@ class Model: self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") + self.model_card = Model.load_model_card(dir_model) self.hparams = Model.load_hparams(self.dir_model) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -148,10 +151,18 @@ class Model: self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) - # Update any missing authorship metadata with huggingface_parameters - if self.metadata is not None and self.metadata.source_hf_repo is None: - if self.hparams is not None and "_name_or_path" in self.hparams: - self.metadata.source_hf_repo = self.hparams["_name_or_path"] + # Update any missing authorship metadata with HuggingFace parameters or model card frontmatter + if self.metadata is not None: + + # Source Hugging Face Repository + if self.metadata.source_hf_repo is None: + if self.hparams is not None and "_name_or_path" in self.hparams: + self.metadata.source_hf_repo = self.hparams["_name_or_path"] + + # Model License + if self.metadata.license is None: + if self.model_card is not None and "license" in self.model_card: + self.metadata.source_hf_repo = self.model_card["license"] # Set model name based on latest metadata either provided or calculated from environment def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): @@ -499,6 +510,11 @@ class Model: return part_names + @staticmethod + def load_model_card(dir_model: Path): + with open(dir_model / "README.md", "r", encoding="utf-8") as f: + return frontmatter.load(f) + @staticmethod def load_hparams(dir_model: Path): with open(dir_model / "config.json", "r", encoding="utf-8") as f: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index dd08a36d2..0919a744e 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -29,9 +29,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: - name = base_name.strip().title().replace(' ', '_') + name = base_name.strip().title().replace(' ', '-') elif model_name is not None: - name = model_name.strip().title().replace(' ', '_') + name = model_name.strip().title().replace(' ', '-') else: name = "ggml-model" @@ -41,10 +41,10 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers else: parameters = f"-{per_model_rounded_weight_estimate}" - finetune = f"-{finetune_string.strip().title().replace(' ', '_')}" if finetune_string is not None else "" + finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" - version = f"-{version_string.strip().replace(' ', '_')}" if version_string is not None else "" + version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" - encoding = f"-{encoding_scheme.strip().replace(' ', '_').upper()}" + encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}" return f"{name}{parameters}{finetune}{version}{encoding}"