diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 296895449..7cfcb6106 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -57,7 +57,6 @@ class Model: lazy: bool part_names: list[str] is_safetensors: bool - model_card: dict[str, Any] hparams: dict[str, Any] block_count: int tensor_map: gguf.TensorNameMap @@ -85,7 +84,6 @@ class Model: self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") - self.model_card = Model.load_model_card(dir_model) self.hparams = Model.load_hparams(self.dir_model) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -249,8 +247,8 @@ class Model: self.gguf_writer.add_url(self.metadata.url) if self.metadata.description is not None: self.gguf_writer.add_description(self.metadata.description) - if self.metadata.licence is not None: - self.gguf_writer.add_licence(self.metadata.licence) + if self.metadata.license is not None: + self.gguf_writer.add_license(self.metadata.license) if self.metadata.source_url is not None: self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: @@ -439,11 +437,6 @@ class Model: return part_names - @staticmethod - def load_model_card(dir_model: Path): - with open(dir_model / "README.md", "r", encoding="utf-8") as f: - return frontmatter.load(f) - @staticmethod def load_hparams(dir_model: Path): with open(dir_model / "config.json", "r", encoding="utf-8") as f: @@ -3611,9 +3604,10 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) - metadata = gguf.Metadata.load(args.metadata) dir_model = args.model + metadata = gguf.Metadata.load(args.metadata, dir_model) + if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') sys.exit(1) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index fe18d5970..5827da530 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1260,10 +1260,12 @@ def main(args_in: list[str] | None = None) -> None: else: logging.basicConfig(level=logging.INFO) + dir_model = args.model + metadata = gguf.Metadata.load(args.metadata) if args.get_outfile: - model_plus = load_some_model(args.model) + model_plus = load_some_model(dir_model) params = Params.load(model_plus) model = convert_model_names(model_plus.model, params, args.skip_unknown) model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) @@ -1275,14 +1277,14 @@ def main(args_in: list[str] | None = None) -> None: raise ValueError("--vocab-only does not make sense with --no-vocab") if args.dump_single: - model_plus = lazy_load_file(args.model) + model_plus = lazy_load_file(dir_model) do_dump_model(model_plus) return if not args.vocab_only: - model_plus = load_some_model(args.model) + model_plus = load_some_model(dir_model) else: - model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None) + model_plus = ModelPlus(model = {}, paths = [dir_model / 'dummy'], format = 'none', vocab = None) model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") @@ -1318,7 +1320,7 @@ def main(args_in: list[str] | None = None) -> None: logger.info(f"params = {params}") model_parent_path = model_plus.paths[0].parent - vocab_path = Path(args.vocab_dir or args.model or model_parent_path) + vocab_path = Path(args.vocab_dir or dir_model or model_parent_path) vocab_factory = VocabFactory(vocab_path) vocab_types = None if args.no_vocab else args.vocab_type.split(",") vocab, special_vocab = vocab_factory.load_vocab(vocab_types, model_parent_path) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 69360a4fb..a0d94a8f6 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -31,6 +31,8 @@ class Keys: URL = "general.url" DESCRIPTION = "general.description" LICENSE = "general.license" + LICENSE_NAME = "general.license.name" + LICENSE_LINK = "general.license.link" SOURCE_URL = "general.source.url" SOURCE_HF_REPO = "general.source.huggingface.repository" FILE_TYPE = "general.file_type" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index a02882f8f..5429d5c55 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -454,6 +454,12 @@ class GGUFWriter: def add_license(self, license: str) -> None: self.add_string(Keys.General.LICENSE, license) + def add_license_name(self, license: str) -> None: + self.add_string(Keys.General.LICENSE_NAME, license) + + def add_license_link(self, license: str) -> None: + self.add_string(Keys.General.LICENSE_LINK, license) + def add_source_url(self, url: str) -> None: self.add_string(Keys.General.SOURCE_URL, url) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 0d175605a..a20d2fde8 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import frontmatter from pathlib import Path from typing import Optional @@ -11,6 +12,7 @@ from .constants import Keys @dataclass class Metadata: + # Authorship Metadata to be written to GGUF KV Store name: Optional[str] = None basename: Optional[str] = None finetune: Optional[str] = None @@ -18,32 +20,98 @@ class Metadata: version: Optional[str] = None url: Optional[str] = None description: Optional[str] = None - licence: Optional[str] = None + license: Optional[str] = None + license_name: Optional[str] = None + license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None @staticmethod - def load(metadata_path: Path) -> Metadata: - if metadata_path is None or not metadata_path.exists(): - return Metadata() - - with open(metadata_path, 'r') as file: - data = json.load(file) + def load(metadata_override_path: Path, model_path: Path) -> Metadata: + # This grabs as many contextual authorship metadata as possible from the model repository + # making any conversion as required to match the gguf kv store metadata format + # as well as giving users the ability to override any authorship metadata that may be incorrect # Create a new Metadata instance metadata = Metadata() - # Assigning values to Metadata attributes if they exist in the JSON file + # load model folder model card if available + # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 + model_card = Metadata.load_model_card(model_path) + if metadata.name is None: + if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: + metadata.name = model_card["model_name"][0].get("name") + elif "model_name" in model_card: + # non huggingface model card standard but notice some model creator using it + metadata.name = model_card.get("model_name") + if metadata.license is None: + metadata.license = model_card.get("license") + if metadata.license_name is None: + metadata.license_name = model_card.get("license_name") + if metadata.license_link is None: + metadata.license_link = model_card.get("license_link") + + # load huggingface parameters if available + hf_params = Metadata.load_huggingface_parameters(model_path) + hf_name_or_path = hf_params.get("_name_or_path") + if metadata.name is None and hf_name_or_path is not None: + metadata.name = Path(hf_name_or_path).name + if metadata.source_hf_repo is None and hf_name_or_path is not None: + metadata.source_hf_repo = Path(hf_name_or_path).name + + # Use Directory Folder Name As Fallback Name + if metadata.name is None: + if model_path is not None and model_path.exists(): + metadata.name = model_path.name + + # Metadata Override # This is based on LLM_KV_NAMES mapping in llama.cpp - metadata.name = data.get(Keys.General.NAME) - metadata.basename = data.get(Keys.General.BASENAME) - metadata.finetune = data.get(Keys.General.FINETUNE) - metadata.author = data.get(Keys.General.AUTHOR) - metadata.version = data.get(Keys.General.VERSION) - metadata.url = data.get(Keys.General.URL) - metadata.description = data.get(Keys.General.DESCRIPTION) - metadata.license = data.get(Keys.General.LICENSE) - metadata.source_url = data.get(Keys.General.SOURCE_URL) - metadata.source_hf_repo = data.get(Keys.General.SOURCE_HF_REPO) + metadata_override = Metadata.load_metadata_override(metadata_override_path) + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO, metadata.source_hf_repo) # noqa: E202 return metadata + + @staticmethod + def load_metadata_override(metadata_override_path: Path): + if metadata_override_path is None or not metadata_override_path.exists(): + return {} + + with open(metadata_override_path, "r", encoding="utf-8") as f: + return json.load(f) + + @staticmethod + def load_model_card(model_path: Path): + if model_path is None or not model_path.exists(): + return {} + + model_card_path = model_path / "README.md" + + if not model_card_path.exists(): + return {} + + with open(model_card_path, "r", encoding="utf-8") as f: + return frontmatter.load(f) + + @staticmethod + def load_huggingface_parameters(model_path: Path): + if model_path is None or not model_path.exists(): + return {} + + config_path = model_path / "config.json" + + if not config_path.exists(): + return {} + + with open(config_path, "r", encoding="utf-8") as f: + return json.load(f)