diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 429b83dbe..43936352b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -134,7 +134,7 @@ class Model: # Update authorship metadata class with parameter size class (useful for leader boards) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) - self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate) + self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) # Generate default filename based on model specification and available metadata self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) @@ -244,6 +244,8 @@ class Model: self.gguf_writer.add_finetune(self.metadata.finetune) if self.metadata.author is not None: self.gguf_writer.add_author(self.metadata.author) + if self.metadata.quantized_by is not None: + self.gguf_writer.add_quantized_by(self.metadata.quantized_by) if self.metadata.organization is not None: self.gguf_writer.add_organization(self.metadata.organization) if self.metadata.version is not None: @@ -260,8 +262,8 @@ class Model: self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) - if self.metadata.parameter_size_class is not None: - self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) + if self.metadata.parameter_weight_class is not None: + self.gguf_writer.add_parameter_weight_class(self.metadata.parameter_weight_class) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) if self.metadata.languages is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 286fa3637..f3839861c 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1377,7 +1377,7 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata) - metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count) + metadata.parameter_weight_class = gguf.parameter_weight_class(params.n_experts, model_params_count) params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a965b37a3..21d9e84b3 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -19,29 +19,30 @@ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h class Keys: class General: - TYPE = "general.type" - ARCHITECTURE = "general.architecture" - QUANTIZATION_VERSION = "general.quantization_version" - ALIGNMENT = "general.alignment" - NAME = "general.name" - BASENAME = "general.basename" - FINETUNE = "general.finetune" - AUTHOR = "general.author" - ORGANIZATION = "general.organization" - VERSION = "general.version" - BASE_VERSION = "general.base_version" - URL = "general.url" - DESCRIPTION = "general.description" - LICENSE = "general.license" - LICENSE_NAME = "general.license.name" - LICENSE_LINK = "general.license.link" - SOURCE_URL = "general.source.url" - SOURCE_HF_REPO = "general.source.huggingface.repository" - FILE_TYPE = "general.file_type" - PARAMETER_SIZE_CLASS = "general.parameter_size_class" - TAGS = "general.tags" - LANGUAGES = "general.languages" - DATASETS = "general.datasets" + TYPE = "general.type" + ARCHITECTURE = "general.architecture" + QUANTIZATION_VERSION = "general.quantization_version" + ALIGNMENT = "general.alignment" + NAME = "general.name" + BASENAME = "general.basename" + FINETUNE = "general.finetune" + AUTHOR = "general.author" + QUANTIZED_BY = "general.quantized_by" + ORGANIZATION = "general.organization" + VERSION = "general.version" + BASE_VERSION = "general.base_version" + URL = "general.url" + DESCRIPTION = "general.description" + LICENSE = "general.license" + LICENSE_NAME = "general.license.name" + LICENSE_LINK = "general.license.link" + SOURCE_URL = "general.source.url" + SOURCE_HF_REPO = "general.source.huggingface.repository" + FILE_TYPE = "general.file_type" + PARAMETER_WEIGHT_CLASS = "general.parameter_weight_class" + TAGS = "general.tags" + LANGUAGES = "general.languages" + DATASETS = "general.datasets" class LLM: VOCAB_SIZE = "{arch}.vocab_size" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8f87e446e..b17112beb 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -439,6 +439,9 @@ class GGUFWriter: def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) + def add_quantized_by(self, quantized: str) -> None: + self.add_string(Keys.General.QUANTIZED_BY, quantized) + def add_organization(self, organization: str) -> None: self.add_string(Keys.General.ORGANIZATION, organization) @@ -475,8 +478,8 @@ class GGUFWriter: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) - def add_parameter_size_class(self, parameter_size_class: str) -> None: - self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class) + def add_parameter_weight_class(self, parameter_weight_class: str) -> None: + self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class) def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 475c99f58..aa8a23830 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re import json import frontmatter from pathlib import Path @@ -15,6 +16,7 @@ class Metadata: basename: Optional[str] = None finetune: Optional[str] = None author: Optional[str] = None + quantized_by: Optional[str] = None organization: Optional[str] = None version: Optional[str] = None base_version: Optional[str] = None @@ -25,13 +27,13 @@ class Metadata: license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None - parameter_size_class: Optional[str] = None + parameter_weight_class: Optional[str] = None tags: Optional[list[str]] = None languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @staticmethod - def load(metadata_override_path: Optional[Path], model_path: Optional[Path], model_name: Optional[str]) -> Metadata: + def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None) -> Metadata: # This grabs as many contextual authorship metadata as possible from the model repository # making any conversion as required to match the gguf kv store metadata format # as well as giving users the ability to override any authorship metadata that may be incorrect @@ -42,14 +44,54 @@ class Metadata: # load huggingface model card if available # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 model_card = Metadata.load_model_card(model_path) - if metadata.name is None: - if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: - # We check if there is only one model information in the model-index - # (This is a safe choice in case there is multiple models in one repo in the future) - metadata.name = model_card["model_name"][0].get("name") - elif "model_name" in model_card: - # non huggingface model card standard but notice some model creator using it - metadata.name = model_card.get("model_name") + + if "model_name" in model_card: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name + metadata.name = model_card.get("model_name") + + if "base_model" in model_card: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke who would encode 'mistralai/Mixtral-8x7B-Instruct-v0.1' into base_model + model_id = model_card.get("base_model") + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) + + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class + + if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: + # This is a model index which has model id that can be extracted into organization and model name + # if so then we can safely extract organization and name + # (This is a safe choice in case there is multiple models in one repo in the future) + model_id = model_card["model-index"][0].get("name") + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) + + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class + + if metadata.quantized_by is None: + # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models + metadata.quantized_by = model_card.get("quantized_by") if metadata.license is None: metadata.license = model_card.get("license") if metadata.license_name is None: @@ -82,24 +124,25 @@ class Metadata: # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 - metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 - metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 - metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 - metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 - metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 - metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 - metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 - metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 - metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 - metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 - metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 - metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 - metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by ) # noqa: E202 + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + metadata.parameter_weight_class = metadata_override.get(Keys.General.PARAMETER_WEIGHT_CLASS, metadata.parameter_weight_class) # noqa: E202 + metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 + metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 + metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 # Direct Metadata Override (via direct cli argument) if model_name is not None: @@ -108,7 +151,7 @@ class Metadata: return metadata @staticmethod - def load_metadata_override(metadata_override_path: Optional[Path]): + def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, object]: if metadata_override_path is None or not metadata_override_path.exists(): return {} @@ -116,7 +159,7 @@ class Metadata: return json.load(f) @staticmethod - def load_model_card(model_path: Optional[Path]): + def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]: if model_path is None or not model_path.exists(): return {} @@ -129,7 +172,7 @@ class Metadata: return frontmatter.load(f) @staticmethod - def load_huggingface_parameters(model_path: Optional[Path]): + def load_huggingface_parameters(model_path: Optional[Path] = None) -> dict[str, object]: if model_path is None or not model_path.exists(): return {} @@ -140,3 +183,50 @@ class Metadata: with open(config_path, "r", encoding="utf-8") as f: return json.load(f) + + @staticmethod + def get_model_name_components(model_identifier: Optional[str] = None) -> dict[str, object]: + # Huggingface often store model id + + if model_identifier is None: + # model ID missing + return None, None, None, None, None, None + + if ' ' in model_identifier: + # model ID is actually a normal human sentence + # which means its most likely a normal model name only + # not part of the hugging face naming standard, but whatever + return model_identifier, None, None, None, None, None + + if '/' in model_identifier: + # model ID (huggingface style) + organization, model = model_identifier.split('/', 1) + else: + # model ID but missing org components + model = model_identifier + organization = None + + # Apply formatting to organization and model_name + # 'stable-diffusion-xl-base-1.0' --> 'Stable Diffusion Xl Base 1.0' + + organization_name = organization.strip().replace('-', ' ').title() if organization is not None else None + model_name_normal = model.strip().replace('-', ' ').title() if model is not None else None + + # Regular expression to extract model name components + # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' + + regex_match = re.compile(r'^(?P[A-Za-z0-9\s]*(?:(?:-[A-Za-z\s][A-Za-z0-9\s]*)*))' + r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+))?' + r'(?:-(?P[A-Za-z0-9\s-]+))?' + r'(?:-(?Pv\d+(?:\.\d+)*))?$').match(model) + + if not regex_match: + return model_name_normal, organization_name, None, None, None, None + + components = regex_match.groupdict() + base_name = components.get("base_name") + fine_tune = components.get("fine_tune") + version_string = components.get("version_string") + parameter_weight_class = components.get("parameter_weight_class") + + return model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 429a590b2..6b50f7e45 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -34,7 +34,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def parameter_size_class(expert_count_int:int, model_params_count: int) -> str: +def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str: per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) if expert_count_int is not None and expert_count_int > 0: @@ -49,9 +49,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: - name = base_name.strip().title().replace(' ', '-') + name = base_name.strip().title().replace(' ', '-').replace('/', '-') elif model_name is not None: - name = model_name.strip().title().replace(' ', '-') + name = model_name.strip().title().replace(' ', '-').replace('/', '-') else: name = "ggml-model"