From 0f1d50fab7c17fcaf53b8bb2101c07eca5fd1360 Mon Sep 17 00:00:00 2001
From: brian khuu <mofosyne@gmail.com>
Date: Sun, 2 Jun 2024 15:40:31 +1000
Subject: [PATCH] convert-*.py: add parameter size class

---
 convert_hf_to_gguf.py                         | 13 ++++---
 examples/convert_legacy_llama.py              |  2 ++
 gguf-py/gguf/constants.py                     |  1 +
 gguf-py/gguf/gguf_writer.py                   |  3 ++
 gguf-py/gguf/metadata.py                      | 34 ++++++++++---------
 gguf-py/gguf/utility.py                       | 11 ++++++
 .../requirements-convert_legacy_llama.txt     |  1 +
 7 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index fc07226e4..ab2f39e27 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -10,7 +10,6 @@ import json
 import os
 import re
 import sys
-import frontmatter
 from enum import IntEnum
 from pathlib import Path
 from hashlib import sha256
@@ -90,11 +89,9 @@ class Model:
         self.tensor_names = None
         self.metadata = metadata
 
-        model_tensors = self.get_tensors()
-
         if self.ftype == gguf.LlamaFileType.GUESSED:
             # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
-            _, first_tensor = next(model_tensors)
+            _, first_tensor = next(self.get_tensors())
             if first_tensor.dtype == torch.float16:
                 logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
                 self.ftype = gguf.LlamaFileType.MOSTLY_F16
@@ -127,10 +124,10 @@ class Model:
         # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
         output_type = self.ftype.name.partition("_")[2]
 
-        # Get Expert Count From huggingface_parameters
+        # Update authorship metadata class with parameter size class (useful for leader boards)
         expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
-
-        weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count)
+        weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count)
+        self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate)
 
         # Generate default filename based on model specification and available metadata
         self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
@@ -255,6 +252,8 @@ class Model:
                 self.gguf_writer.add_source_url(self.metadata.source_url)
             if self.metadata.source_hf_repo is not None:
                 self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
+            if self.metadata.parameter_size_class is not None:
+                self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class)
             if self.metadata.tags is not None:
                 self.gguf_writer.add_tags(self.metadata.tags)
 
diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py
index 6e17cb3bd..2bf008a6b 100755
--- a/examples/convert_legacy_llama.py
+++ b/examples/convert_legacy_llama.py
@@ -1363,6 +1363,8 @@ def main(args_in: list[str] | None = None) -> None:
     model   = convert_to_output_type(model, ftype)
     outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
 
+    metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count)
+
     params.ftype = ftype
     logger.info(f"Writing {outfile}, format {ftype}")
 
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 907d781b0..561e082aa 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -37,6 +37,7 @@ class Keys:
         SOURCE_URL           = "general.source.url"
         SOURCE_HF_REPO       = "general.source.huggingface.repository"
         FILE_TYPE            = "general.file_type"
+        PARAMETER_SIZE_CLASS = "general.parameter_size_class"
         TAGS                 = "general.tags"
 
     class LLM:
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 619220d44..fdac3455e 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -472,6 +472,9 @@ class GGUFWriter:
     def add_file_type(self, ftype: int) -> None:
         self.add_uint32(Keys.General.FILE_TYPE, ftype)
 
+    def add_parameter_size_class(self, parameter_size_class: str) -> None:
+        self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class)
+
     def add_tags(self, tags: Sequence[str]) -> None:
         self.add_array(Keys.Tokenizer.TAGS, tags)
 
diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py
index b481dd93d..8bed1a3df 100644
--- a/gguf-py/gguf/metadata.py
+++ b/gguf-py/gguf/metadata.py
@@ -26,7 +26,8 @@ class Metadata:
     license_link: Optional[str] = None
     source_url: Optional[str] = None
     source_hf_repo: Optional[str] = None
-    tags: Optional[List[str]] = None
+    parameter_size_class: Optional[str] = None
+    tags: Optional[list[str]] = None
 
     @staticmethod
     def load(metadata_override_path: Path, model_path: Path) -> Metadata:
@@ -56,7 +57,7 @@ class Metadata:
             metadata.license_link = model_card.get("license_link")
         if metadata.author is None:
             # non huggingface model card standard but notice some model creator using it
-            metadata.author =  model_card.get("model_creator")
+            metadata.author = model_card.get("model_creator")
         if metadata.tags is None:
             metadata.tags = model_card.get("tags", [])
 
@@ -76,20 +77,21 @@ class Metadata:
         # Metadata Override
         # This is based on LLM_KV_NAMES mapping in llama.cpp
         metadata_override = Metadata.load_metadata_override(metadata_override_path)
-        metadata.name           = metadata_override.get(Keys.General.NAME          ,  metadata.name          ) # noqa: E202
-        metadata.basename       = metadata_override.get(Keys.General.BASENAME      ,  metadata.basename      ) # noqa: E202
-        metadata.finetune       = metadata_override.get(Keys.General.FINETUNE      ,  metadata.finetune      ) # noqa: E202
-        metadata.author         = metadata_override.get(Keys.General.AUTHOR        ,  metadata.author        ) # noqa: E202
-        metadata.version        = metadata_override.get(Keys.General.VERSION       ,  metadata.version       ) # noqa: E202
-        metadata.base_version   = metadata_override.get(Keys.General.BASE_VERSION  ,  metadata.base_version  ) # noqa: E202
-        metadata.url            = metadata_override.get(Keys.General.URL           ,  metadata.url           ) # noqa: E202
-        metadata.description    = metadata_override.get(Keys.General.DESCRIPTION   ,  metadata.description   ) # noqa: E202
-        metadata.license        = metadata_override.get(Keys.General.LICENSE       ,  metadata.license       ) # noqa: E202
-        metadata.license_name   = metadata_override.get(Keys.General.LICENSE_NAME  ,  metadata.license_name  ) # noqa: E202
-        metadata.license_link   = metadata_override.get(Keys.General.LICENSE_LINK  ,  metadata.license_link  ) # noqa: E202
-        metadata.source_url     = metadata_override.get(Keys.General.SOURCE_URL    ,  metadata.source_url    ) # noqa: E202
-        metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO,  metadata.source_hf_repo) # noqa: E202
-        metadata.tags           = metadata_override.get(Keys.General.TAGS          ,  metadata.tags          ) # noqa: E202
+        metadata.name                 = metadata_override.get(Keys.General.NAME                ,  metadata.name                ) # noqa: E202
+        metadata.basename             = metadata_override.get(Keys.General.BASENAME            ,  metadata.basename            ) # noqa: E202
+        metadata.finetune             = metadata_override.get(Keys.General.FINETUNE            ,  metadata.finetune            ) # noqa: E202
+        metadata.author               = metadata_override.get(Keys.General.AUTHOR              ,  metadata.author              ) # noqa: E202
+        metadata.version              = metadata_override.get(Keys.General.VERSION             ,  metadata.version             ) # noqa: E202
+        metadata.base_version         = metadata_override.get(Keys.General.BASE_VERSION        ,  metadata.base_version        ) # noqa: E202
+        metadata.url                  = metadata_override.get(Keys.General.URL                 ,  metadata.url                 ) # noqa: E202
+        metadata.description          = metadata_override.get(Keys.General.DESCRIPTION         ,  metadata.description         ) # noqa: E202
+        metadata.license              = metadata_override.get(Keys.General.LICENSE             ,  metadata.license             ) # noqa: E202
+        metadata.license_name         = metadata_override.get(Keys.General.LICENSE_NAME        ,  metadata.license_name        ) # noqa: E202
+        metadata.license_link         = metadata_override.get(Keys.General.LICENSE_LINK        ,  metadata.license_link        ) # noqa: E202
+        metadata.source_url           = metadata_override.get(Keys.General.SOURCE_URL          ,  metadata.source_url          ) # noqa: E202
+        metadata.source_hf_repo       = metadata_override.get(Keys.General.SOURCE_HF_REPO      ,  metadata.source_hf_repo      ) # noqa: E202
+        metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS,  metadata.parameter_size_class) # noqa: E202
+        metadata.tags                 = metadata_override.get(Keys.General.TAGS                ,  metadata.tags                ) # noqa: E202
 
         return metadata
 
diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py
index 2a52d1273..0ee3499e2 100644
--- a/gguf-py/gguf/utility.py
+++ b/gguf-py/gguf/utility.py
@@ -65,6 +65,17 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str:
     return f"{round(scaled_model_params)}{scale_suffix}"
 
 
+def parameter_size_class(expert_count_int:int, model_params_count: int) -> str:
+    per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count)
+
+    if expert_count_int is not None and expert_count_int > 0:
+        size_class = f"{expert_count_int}x{per_model_rounded_weight_estimate}"
+    else:
+        size_class = f"{per_model_rounded_weight_estimate}"
+
+    return size_class
+
+
 def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
     # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
 
diff --git a/requirements/requirements-convert_legacy_llama.txt b/requirements/requirements-convert_legacy_llama.txt
index 1d07b0952..493dbe18c 100644
--- a/requirements/requirements-convert_legacy_llama.txt
+++ b/requirements/requirements-convert_legacy_llama.txt
@@ -3,3 +3,4 @@ sentencepiece~=0.2.0
 transformers>=4.40.1,<5.0.0
 gguf>=0.1.0
 protobuf>=4.21.0,<5.0.0
+python-frontmatter~=1.0.1