convert-*.py: model card metadata

2024-05-31 14:19:53 +10:00 · 2024-05-31 14:19:53 +10:00 · 916872f72f
commit 916872f72f
parent a42c2b7efc
2 changed files with 25 additions and 9 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -10,6 +10,7 @@ import json
 import os
 import re
 import sys
 import frontmatter
 from enum import IntEnum
 from pathlib import Path
 from hashlib import sha256
@ -96,6 +97,7 @@ class Model:
    lazy: bool
    part_names: list[str]
    is_safetensors: bool
    model_card: dict[str, Any]
    hparams: dict[str, Any]
    block_count: int
    tensor_map: gguf.TensorNameMap
@ -123,6 +125,7 @@ class Model:
        self.is_safetensors = len(self.part_names) > 0
        if not self.is_safetensors:
            self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
        self.model_card = Model.load_model_card(dir_model)
        self.hparams = Model.load_hparams(self.dir_model)
        self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
        self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
@ -148,10 +151,18 @@ class Model:
        self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
                                           split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
-        # Update any missing authorship metadata with huggingface_parameters
+        # Update any missing authorship metadata with HuggingFace parameters or model card frontmatter
-        if self.metadata is not None and self.metadata.source_hf_repo is None:
+        if self.metadata is not None:
-            if self.hparams is not None and "_name_or_path" in self.hparams:
+
-                self.metadata.source_hf_repo = self.hparams["_name_or_path"]
+            # Source Hugging Face Repository
            if self.metadata.source_hf_repo is None:
                if self.hparams is not None and "_name_or_path" in self.hparams:
                    self.metadata.source_hf_repo = self.hparams["_name_or_path"]
            # Model License
            if self.metadata.license is None:
                if self.model_card is not None and "license" in self.model_card:
                    self.metadata.source_hf_repo = self.model_card["license"]
        # Set model name based on latest metadata either provided or calculated from environment
        def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
@ -499,6 +510,11 @@ class Model:
        return part_names
    @staticmethod
    def load_model_card(dir_model: Path):
        with open(dir_model / "README.md", "r", encoding="utf-8") as f:
            return frontmatter.load(f)
    @staticmethod
    def load_hparams(dir_model: Path):
        with open(dir_model / "config.json", "r", encoding="utf-8") as f:
--- a/gguf-py/gguf/utility.py
+++ b/gguf-py/gguf/utility.py
@ -29,9 +29,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
    # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
    if base_name is not None:
-        name = base_name.strip().title().replace(' ', '_')
+        name = base_name.strip().title().replace(' ', '-')
    elif model_name is not None:
-        name = model_name.strip().title().replace(' ', '_')
+        name = model_name.strip().title().replace(' ', '-')
    else:
        name = "ggml-model"
@ -41,10 +41,10 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
    else:
        parameters = f"-{per_model_rounded_weight_estimate}"
-    finetune = f"-{finetune_string.strip().title().replace(' ', '_')}" if finetune_string is not None else ""
+    finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""
-    version = f"-{version_string.strip().replace(' ', '_')}" if version_string is not None else ""
+    version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
-    encoding = f"-{encoding_scheme.strip().replace(' ', '_').upper()}"
+    encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}"
    return f"{name}{parameters}{finetune}{version}{encoding}"