convert-*.py: add quantized_by and enhance heuristics

2024-06-03 23:52:46 +10:00 · 2024-06-03 23:52:46 +10:00 · dd1571211e
commit dd1571211e
parent 5a86dfaa1c
6 changed files with 159 additions and 63 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -134,7 +134,7 @@ class Model:
        # Update authorship metadata class with parameter size class (useful for leader boards)
        expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
        weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count)
-        self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate)
+        self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate)
        # Generate default filename based on model specification and available metadata
        self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
@ -244,6 +244,8 @@ class Model:
            self.gguf_writer.add_finetune(self.metadata.finetune)
        if self.metadata.author is not None:
            self.gguf_writer.add_author(self.metadata.author)
        if self.metadata.quantized_by is not None:
            self.gguf_writer.add_quantized_by(self.metadata.quantized_by)
        if self.metadata.organization is not None:
            self.gguf_writer.add_organization(self.metadata.organization)
        if self.metadata.version is not None:
@ -260,8 +262,8 @@ class Model:
            self.gguf_writer.add_source_url(self.metadata.source_url)
        if self.metadata.source_hf_repo is not None:
            self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
-        if self.metadata.parameter_size_class is not None:
+        if self.metadata.parameter_weight_class is not None:
-            self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class)
+            self.gguf_writer.add_parameter_weight_class(self.metadata.parameter_weight_class)
        if self.metadata.tags is not None:
            self.gguf_writer.add_tags(self.metadata.tags)
        if self.metadata.languages is not None:
--- a/examples/convert_legacy_llama.py
+++ b/examples/convert_legacy_llama.py
@ -1377,7 +1377,7 @@ def main(args_in: list[str] | None = None) -> None:
    model   = convert_to_output_type(model, ftype)
    outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata)
-    metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count)
+    metadata.parameter_weight_class = gguf.parameter_weight_class(params.n_experts, model_params_count)
    params.ftype = ftype
    logger.info(f"Writing {outfile}, format {ftype}")
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -19,29 +19,30 @@ GGML_QUANT_VERSION     = 2  # GGML_QNT_VERSION from ggml.h
 class Keys:
    class General:
-        TYPE                 = "general.type"
+        TYPE                   = "general.type"
-        ARCHITECTURE         = "general.architecture"
+        ARCHITECTURE           = "general.architecture"
-        QUANTIZATION_VERSION = "general.quantization_version"
+        QUANTIZATION_VERSION   = "general.quantization_version"
-        ALIGNMENT            = "general.alignment"
+        ALIGNMENT              = "general.alignment"
-        NAME                 = "general.name"
+        NAME                   = "general.name"
-        BASENAME             = "general.basename"
+        BASENAME               = "general.basename"
-        FINETUNE             = "general.finetune"
+        FINETUNE               = "general.finetune"
-        AUTHOR               = "general.author"
+        AUTHOR                 = "general.author"
-        ORGANIZATION         = "general.organization"
+        QUANTIZED_BY           = "general.quantized_by"
-        VERSION              = "general.version"
+        ORGANIZATION           = "general.organization"
-        BASE_VERSION         = "general.base_version"
+        VERSION                = "general.version"
-        URL                  = "general.url"
+        BASE_VERSION           = "general.base_version"
-        DESCRIPTION          = "general.description"
+        URL                    = "general.url"
-        LICENSE              = "general.license"
+        DESCRIPTION            = "general.description"
-        LICENSE_NAME         = "general.license.name"
+        LICENSE                = "general.license"
-        LICENSE_LINK         = "general.license.link"
+        LICENSE_NAME           = "general.license.name"
-        SOURCE_URL           = "general.source.url"
+        LICENSE_LINK           = "general.license.link"
-        SOURCE_HF_REPO       = "general.source.huggingface.repository"
+        SOURCE_URL             = "general.source.url"
-        FILE_TYPE            = "general.file_type"
+        SOURCE_HF_REPO         = "general.source.huggingface.repository"
-        PARAMETER_SIZE_CLASS = "general.parameter_size_class"
+        FILE_TYPE              = "general.file_type"
-        TAGS                 = "general.tags"
+        PARAMETER_WEIGHT_CLASS = "general.parameter_weight_class"
-        LANGUAGES            = "general.languages"
+        TAGS                   = "general.tags"
-        DATASETS             = "general.datasets"
+        LANGUAGES              = "general.languages"
        DATASETS               = "general.datasets"
    class LLM:
        VOCAB_SIZE                        = "{arch}.vocab_size"
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -439,6 +439,9 @@ class GGUFWriter:
    def add_author(self, author: str) -> None:
        self.add_string(Keys.General.AUTHOR, author)
    def add_quantized_by(self, quantized: str) -> None:
        self.add_string(Keys.General.QUANTIZED_BY, quantized)
    def add_organization(self, organization: str) -> None:
        self.add_string(Keys.General.ORGANIZATION, organization)
@ -475,8 +478,8 @@ class GGUFWriter:
    def add_file_type(self, ftype: int) -> None:
        self.add_uint32(Keys.General.FILE_TYPE, ftype)
-    def add_parameter_size_class(self, parameter_size_class: str) -> None:
+    def add_parameter_weight_class(self, parameter_weight_class: str) -> None:
-        self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class)
+        self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class)
    def add_tags(self, tags: Sequence[str]) -> None:
        self.add_array(Keys.Tokenizer.TAGS, tags)
--- a/gguf-py/gguf/metadata.py
+++ b/gguf-py/gguf/metadata.py
@ -1,5 +1,6 @@
 from __future__ import annotations
 import re
 import json
 import frontmatter
 from pathlib import Path
@ -15,6 +16,7 @@ class Metadata:
    basename: Optional[str] = None
    finetune: Optional[str] = None
    author: Optional[str] = None
    quantized_by: Optional[str] = None
    organization: Optional[str] = None
    version: Optional[str] = None
    base_version: Optional[str] = None
@ -25,13 +27,13 @@ class Metadata:
    license_link: Optional[str] = None
    source_url: Optional[str] = None
    source_hf_repo: Optional[str] = None
-    parameter_size_class: Optional[str] = None
+    parameter_weight_class: Optional[str] = None
    tags: Optional[list[str]] = None
    languages: Optional[list[str]] = None
    datasets: Optional[list[str]] = None
    @staticmethod
-    def load(metadata_override_path: Optional[Path], model_path: Optional[Path], model_name: Optional[str]) -> Metadata:
+    def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None) -> Metadata:
        # This grabs as many contextual authorship metadata as possible from the model repository
        # making any conversion as required to match the gguf kv store metadata format
        # as well as giving users the ability to override any authorship metadata that may be incorrect
@ -42,14 +44,54 @@ class Metadata:
        # load huggingface model card if available
        # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
        model_card = Metadata.load_model_card(model_path)
-        if metadata.name is None:
+
-            if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]:
+        if "model_name" in model_card:
-                # We check if there is only one model information in the model-index
+            # Not part of huggingface model card standard but notice some model creator using it
-                # (This is a safe choice in case there is multiple models in one repo in the future)
+            # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name
-                metadata.name = model_card["model_name"][0].get("name")
+            metadata.name = model_card.get("model_name")
-            elif "model_name" in model_card:
+
-                # non huggingface model card standard but notice some model creator using it
+        if "base_model" in model_card:
-                metadata.name = model_card.get("model_name")
+            # Not part of huggingface model card standard but notice some model creator using it
            # such as TheBloke who would encode 'mistralai/Mixtral-8x7B-Instruct-v0.1' into base_model
            model_id = model_card.get("base_model")
            model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id)
            if metadata.name is None and model_name_normal is not None:
                metadata.name = model_name_normal
            if metadata.organization is None and organization_name is not None:
                metadata.organization = organization_name
            if metadata.basename is None and base_name is not None:
                metadata.basename = base_name
            if metadata.finetune is None and fine_tune is not None:
                metadata.finetune = fine_tune
            if metadata.version is None and version_string is not None:
                metadata.version = version_string
            if metadata.parameter_weight_class is None and parameter_weight_class is not None:
                metadata.parameter_weight_class = parameter_weight_class
        if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]:
            # This is a model index which has model id that can be extracted into organization and model name
            # if so then we can safely extract organization and name
            # (This is a safe choice in case there is multiple models in one repo in the future)
            model_id = model_card["model-index"][0].get("name")
            model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id)
            if metadata.name is None and model_name_normal is not None:
                metadata.name = model_name_normal
            if metadata.organization is None and organization_name is not None:
                metadata.organization = organization_name
            if metadata.basename is None and base_name is not None:
                metadata.basename = base_name
            if metadata.finetune is None and fine_tune is not None:
                metadata.finetune = fine_tune
            if metadata.version is None and version_string is not None:
                metadata.version = version_string
            if metadata.parameter_weight_class is None and parameter_weight_class is not None:
                metadata.parameter_weight_class = parameter_weight_class
        if metadata.quantized_by is None:
            # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models
            metadata.quantized_by = model_card.get("quantized_by")
        if metadata.license is None:
            metadata.license = model_card.get("license")
        if metadata.license_name is None:
@ -82,24 +124,25 @@ class Metadata:
        # Metadata Override File Provided
        # This is based on LLM_KV_NAMES mapping in llama.cpp
        metadata_override = Metadata.load_metadata_override(metadata_override_path)
-        metadata.name                 = metadata_override.get(Keys.General.NAME                ,  metadata.name                ) # noqa: E202
+        metadata.name                   = metadata_override.get(Keys.General.NAME                  ,  metadata.name                  ) # noqa: E202
-        metadata.basename             = metadata_override.get(Keys.General.BASENAME            ,  metadata.basename            ) # noqa: E202
+        metadata.basename               = metadata_override.get(Keys.General.BASENAME              ,  metadata.basename              ) # noqa: E202
-        metadata.finetune             = metadata_override.get(Keys.General.FINETUNE            ,  metadata.finetune            ) # noqa: E202
+        metadata.finetune               = metadata_override.get(Keys.General.FINETUNE              ,  metadata.finetune              ) # noqa: E202
-        metadata.author               = metadata_override.get(Keys.General.AUTHOR              ,  metadata.author              ) # noqa: E202
+        metadata.author                 = metadata_override.get(Keys.General.AUTHOR                ,  metadata.author                ) # noqa: E202
-        metadata.organization         = metadata_override.get(Keys.General.ORGANIZATION        ,  metadata.organization        ) # noqa: E202
+        metadata.quantized_by           = metadata_override.get(Keys.General.QUANTIZED_BY          ,  metadata.quantized_by          ) # noqa: E202
-        metadata.version              = metadata_override.get(Keys.General.VERSION             ,  metadata.version             ) # noqa: E202
+        metadata.organization           = metadata_override.get(Keys.General.ORGANIZATION          ,  metadata.organization          ) # noqa: E202
-        metadata.base_version         = metadata_override.get(Keys.General.BASE_VERSION        ,  metadata.base_version        ) # noqa: E202
+        metadata.version                = metadata_override.get(Keys.General.VERSION               ,  metadata.version               ) # noqa: E202
-        metadata.url                  = metadata_override.get(Keys.General.URL                 ,  metadata.url                 ) # noqa: E202
+        metadata.base_version           = metadata_override.get(Keys.General.BASE_VERSION          ,  metadata.base_version          ) # noqa: E202
-        metadata.description          = metadata_override.get(Keys.General.DESCRIPTION         ,  metadata.description         ) # noqa: E202
+        metadata.url                    = metadata_override.get(Keys.General.URL                   ,  metadata.url                   ) # noqa: E202
-        metadata.license              = metadata_override.get(Keys.General.LICENSE             ,  metadata.license             ) # noqa: E202
+        metadata.description            = metadata_override.get(Keys.General.DESCRIPTION           ,  metadata.description           ) # noqa: E202
-        metadata.license_name         = metadata_override.get(Keys.General.LICENSE_NAME        ,  metadata.license_name        ) # noqa: E202
+        metadata.license                = metadata_override.get(Keys.General.LICENSE               ,  metadata.license               ) # noqa: E202
-        metadata.license_link         = metadata_override.get(Keys.General.LICENSE_LINK        ,  metadata.license_link        ) # noqa: E202
+        metadata.license_name           = metadata_override.get(Keys.General.LICENSE_NAME          ,  metadata.license_name          ) # noqa: E202
-        metadata.source_url           = metadata_override.get(Keys.General.SOURCE_URL          ,  metadata.source_url          ) # noqa: E202
+        metadata.license_link           = metadata_override.get(Keys.General.LICENSE_LINK          ,  metadata.license_link          ) # noqa: E202
-        metadata.source_hf_repo       = metadata_override.get(Keys.General.SOURCE_HF_REPO      ,  metadata.source_hf_repo      ) # noqa: E202
+        metadata.source_url             = metadata_override.get(Keys.General.SOURCE_URL            ,  metadata.source_url            ) # noqa: E202
-        metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS,  metadata.parameter_size_class) # noqa: E202
+        metadata.source_hf_repo         = metadata_override.get(Keys.General.SOURCE_HF_REPO        ,  metadata.source_hf_repo        ) # noqa: E202
-        metadata.tags                 = metadata_override.get(Keys.General.TAGS                ,  metadata.tags                ) # noqa: E202
+        metadata.parameter_weight_class = metadata_override.get(Keys.General.PARAMETER_WEIGHT_CLASS,  metadata.parameter_weight_class) # noqa: E202
-        metadata.languages            = metadata_override.get(Keys.General.LANGUAGES           ,  metadata.languages           ) # noqa: E202
+        metadata.tags                   = metadata_override.get(Keys.General.TAGS                  ,  metadata.tags                  ) # noqa: E202
-        metadata.datasets             = metadata_override.get(Keys.General.DATASETS            ,  metadata.datasets            ) # noqa: E202
+        metadata.languages              = metadata_override.get(Keys.General.LANGUAGES             ,  metadata.languages             ) # noqa: E202
        metadata.datasets               = metadata_override.get(Keys.General.DATASETS              ,  metadata.datasets              ) # noqa: E202
        # Direct Metadata Override (via direct cli argument)
        if model_name is not None:
@ -108,7 +151,7 @@ class Metadata:
        return metadata
    @staticmethod
-    def load_metadata_override(metadata_override_path: Optional[Path]):
+    def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, object]:
        if metadata_override_path is None or not metadata_override_path.exists():
            return {}
@ -116,7 +159,7 @@ class Metadata:
            return json.load(f)
    @staticmethod
-    def load_model_card(model_path: Optional[Path]):
+    def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]:
        if model_path is None or not model_path.exists():
            return {}
@ -129,7 +172,7 @@ class Metadata:
            return frontmatter.load(f)
    @staticmethod
-    def load_huggingface_parameters(model_path: Optional[Path]):
+    def load_huggingface_parameters(model_path: Optional[Path] = None) -> dict[str, object]:
        if model_path is None or not model_path.exists():
            return {}
@ -140,3 +183,50 @@ class Metadata:
        with open(config_path, "r", encoding="utf-8") as f:
            return json.load(f)
    @staticmethod
    def get_model_name_components(model_identifier: Optional[str] = None) -> dict[str, object]:
        # Huggingface often store model id
        if model_identifier is None:
            # model ID missing
            return None, None, None, None, None, None
        if ' ' in model_identifier:
            # model ID is actually a normal human sentence
            # which means its most likely a normal model name only
            # not part of the hugging face naming standard, but whatever
            return model_identifier, None, None, None, None, None
        if '/' in model_identifier:
            # model ID (huggingface style)
            organization, model = model_identifier.split('/', 1)
        else:
            # model ID but missing org components
            model = model_identifier
            organization = None
        # Apply formatting to organization and model_name
        # 'stable-diffusion-xl-base-1.0' --> 'Stable Diffusion Xl Base 1.0'
        organization_name = organization.strip().replace('-', ' ').title() if organization is not None else None
        model_name_normal = model.strip().replace('-', ' ').title() if model is not None else None
        # Regular expression to extract model name components
        # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1'
        regex_match = re.compile(r'^(?P<base_name>[A-Za-z0-9\s]*(?:(?:-[A-Za-z\s][A-Za-z0-9\s]*)*))'
                                 r'(?:-(?P<parameter_weight_class>(?:\d+x)?\d+[A-Za-z]+))?'
                                 r'(?:-(?P<fine_tune>[A-Za-z0-9\s-]+))?'
                                 r'(?:-(?P<version_string>v\d+(?:\.\d+)*))?$').match(model)
        if not regex_match:
            return model_name_normal, organization_name, None, None, None, None
        components = regex_match.groupdict()
        base_name = components.get("base_name")
        fine_tune = components.get("fine_tune")
        version_string = components.get("version_string")
        parameter_weight_class = components.get("parameter_weight_class")
        return model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class
--- a/gguf-py/gguf/utility.py
+++ b/gguf-py/gguf/utility.py
@ -34,7 +34,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str:
    return f"{round(scaled_model_params)}{scale_suffix}"
-def parameter_size_class(expert_count_int:int, model_params_count: int) -> str:
+def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str:
    per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count)
    if expert_count_int is not None and expert_count_int > 0:
@ -49,9 +49,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
    # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
    if base_name is not None:
-        name = base_name.strip().title().replace(' ', '-')
+        name = base_name.strip().title().replace(' ', '-').replace('/', '-')
    elif model_name is not None:
-        name = model_name.strip().title().replace(' ', '-')
+        name = model_name.strip().title().replace(' ', '-').replace('/', '-')
    else:
        name = "ggml-model"