From 0f1d50fab7c17fcaf53b8bb2101c07eca5fd1360 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 15:40:31 +1000 Subject: [PATCH] convert-*.py: add parameter size class --- convert_hf_to_gguf.py | 13 ++++--- examples/convert_legacy_llama.py | 2 ++ gguf-py/gguf/constants.py | 1 + gguf-py/gguf/gguf_writer.py | 3 ++ gguf-py/gguf/metadata.py | 34 ++++++++++--------- gguf-py/gguf/utility.py | 11 ++++++ .../requirements-convert_legacy_llama.txt | 1 + 7 files changed, 42 insertions(+), 23 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index fc07226e4..ab2f39e27 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -10,7 +10,6 @@ import json import os import re import sys -import frontmatter from enum import IntEnum from pathlib import Path from hashlib import sha256 @@ -90,11 +89,9 @@ class Model: self.tensor_names = None self.metadata = metadata - model_tensors = self.get_tensors() - if self.ftype == gguf.LlamaFileType.GUESSED: # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. - _, first_tensor = next(model_tensors) + _, first_tensor = next(self.get_tensors()) if first_tensor.dtype == torch.float16: logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_F16 @@ -127,10 +124,10 @@ class Model: # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] - # Get Expert Count From huggingface_parameters + # Update authorship metadata class with parameter size class (useful for leader boards) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - - weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) + weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count) + self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate) # Generate default filename based on model specification and available metadata self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) @@ -255,6 +252,8 @@ class Model: self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) + if self.metadata.parameter_size_class is not None: + self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 6e17cb3bd..2bf008a6b 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1363,6 +1363,8 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata) + metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count) + params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 907d781b0..561e082aa 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -37,6 +37,7 @@ class Keys: SOURCE_URL = "general.source.url" SOURCE_HF_REPO = "general.source.huggingface.repository" FILE_TYPE = "general.file_type" + PARAMETER_SIZE_CLASS = "general.parameter_size_class" TAGS = "general.tags" class LLM: diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 619220d44..fdac3455e 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -472,6 +472,9 @@ class GGUFWriter: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_parameter_size_class(self, parameter_size_class: str) -> None: + self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class) + def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index b481dd93d..8bed1a3df 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -26,7 +26,8 @@ class Metadata: license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None - tags: Optional[List[str]] = None + parameter_size_class: Optional[str] = None + tags: Optional[list[str]] = None @staticmethod def load(metadata_override_path: Path, model_path: Path) -> Metadata: @@ -56,7 +57,7 @@ class Metadata: metadata.license_link = model_card.get("license_link") if metadata.author is None: # non huggingface model card standard but notice some model creator using it - metadata.author = model_card.get("model_creator") + metadata.author = model_card.get("model_creator") if metadata.tags is None: metadata.tags = model_card.get("tags", []) @@ -76,20 +77,21 @@ class Metadata: # Metadata Override # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 - metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 - metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 - metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 - metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 - metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 - metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 - metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 - metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO, metadata.source_hf_repo) # noqa: E202 - metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 + metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 return metadata diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 2a52d1273..0ee3499e2 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -65,6 +65,17 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" +def parameter_size_class(expert_count_int:int, model_params_count: int) -> str: + per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) + + if expert_count_int is not None and expert_count_int > 0: + size_class = f"{expert_count_int}x{per_model_rounded_weight_estimate}" + else: + size_class = f"{per_model_rounded_weight_estimate}" + + return size_class + + def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention diff --git a/requirements/requirements-convert_legacy_llama.txt b/requirements/requirements-convert_legacy_llama.txt index 1d07b0952..493dbe18c 100644 --- a/requirements/requirements-convert_legacy_llama.txt +++ b/requirements/requirements-convert_legacy_llama.txt @@ -3,3 +3,4 @@ sentencepiece~=0.2.0 transformers>=4.40.1,<5.0.0 gguf>=0.1.0 protobuf>=4.21.0,<5.0.0 +python-frontmatter~=1.0.1