diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 6af3f243a..306930180 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -60,19 +60,19 @@ class Model: tensor_map: gguf.TensorNameMap tensor_names: set[str] | None gguf_writer: gguf.GGUFWriter - metadata: gguf.Metadata + model_name: str | None + metadata_override: Path | None # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool = False, use_temp_file: bool = False, eager: bool = False, metadata: gguf.Metadata | None = None, + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool = False, + use_temp_file: bool = False, eager: bool = False, + metadata_override: Path | None = None, model_name: str | None = None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") - if metadata is None: - metadata = gguf.Metadata() - self.dir_model = dir_model self.ftype = ftype self.fname_out = fname_out @@ -88,7 +88,8 @@ class Model: self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) self.tensor_names = None - self.metadata = metadata + self.metadata_override = metadata_override + self.model_name = model_name # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type if self.ftype == gguf.LlamaFileType.GUESSED: @@ -337,18 +338,22 @@ class Model: self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) + def set_type(self): + self.gguf_writer.add_type(gguf.GGUFType.MODEL) + def prepare_metadata(self, vocab_only: bool): + total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() + + self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, total_params) + # Fallback to model directory name if metadata name is still missing if self.metadata.name is None: self.metadata.name = self.dir_model.name # Generate parameter weight class (useful for leader boards) if not yet determined - if self.metadata.size_label is None: - total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() - - if (total_params > 0): - self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) + if self.metadata.size_label is None and total_params > 0: + self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) # Filename Output if self.fname_out is not None and not self.fname_out.is_dir(): @@ -383,11 +388,12 @@ class Model: # output in the same directory as the model by default self.fname_out = self.dir_model / f"{fname_default}.gguf" + self.set_type() + logger.info("Set meta model") self.metadata.set_gguf_meta_model(self.gguf_writer) logger.info("Set model parameters") - self.gguf_writer.add_type(gguf.GGUFType.MODEL) self.set_gguf_parameters() logger.info("Set model tokenizer") @@ -3607,11 +3613,8 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) - model_name = args.model_name dir_model = args.model - metadata = gguf.Metadata.load(args.metadata, dir_model, model_name) - if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') sys.exit(1) @@ -3650,7 +3653,9 @@ def main() -> None: model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out, is_big_endian=args.bigendian, use_temp_file=args.use_temp_file, - eager=args.no_lazy, metadata=metadata, split_max_tensors=args.split_max_tensors, + eager=args.no_lazy, + metadata_override=args.metadata, model_name=args.model_name, + split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index f5e9fec28..66e8da37c 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -251,6 +251,10 @@ def parse_args() -> argparse.Namespace: "--verbose", action="store_true", help="increase output verbosity", ) + parser.add_argument( + "--dry-run", action="store_true", + help="only print out what will be done, without writing any new files", + ) parser.add_argument( "--base", type=Path, required=True, help="directory containing base model file", @@ -300,6 +304,12 @@ if __name__ == '__main__': # load base model logger.info(f"Loading base model: {dir_base_model.name}") hparams = Model.load_hparams(dir_base_model) + + with open(lora_config, "r") as f: + lparams: dict[str, Any] = json.load(f) + + alpha: float = lparams["lora_alpha"] + with torch.inference_mode(): try: model_class = Model.from_model_architecture(hparams["architectures"][0]) @@ -310,6 +320,14 @@ if __name__ == '__main__': class LoraModel(model_class): model_arch = model_class.model_arch + def set_type(self): + self.gguf_writer.add_type(gguf.GGUFType.ADAPTER) + self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora") + + def set_gguf_parameters(self): + self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha)) + super().set_gguf_parameters() + def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map: dict[str, PartialLoraTensor] = {} @@ -356,18 +374,10 @@ if __name__ == '__main__': fname_out, is_big_endian=args.bigendian, use_temp_file=False, - eager=args.no_lazy + eager=args.no_lazy, + dry_run=args.dry_run, ) - with open(lora_config, "r") as f: - lparams: dict[str, Any] = json.load(f) - - alpha = lparams["lora_alpha"] - - model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER) - model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora") - model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha)) - model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) logger.info("Exporting model...") model_instance.write() logger.info(f"Model successfully exported to {model_instance.fname_out}") diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index f0f029a18..8542b3adb 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -7,6 +7,7 @@ import struct import tempfile from dataclasses import dataclass from enum import Enum, auto +from math import prod from pathlib import Path from io import BufferedWriter from typing import IO, Any, Sequence, Mapping @@ -17,7 +18,6 @@ import numpy as np from .constants import ( GGUF_DEFAULT_ALIGNMENT, GGUF_MAGIC, - GGML_QUANT_SIZES, GGUF_VERSION, GGMLQuantizationType, GGUFEndian, @@ -115,16 +115,29 @@ class GGUFWriter: expert_sum = 0 n_expert_tensors = 0 + last_lora_a: tuple[str, TensorInfo] | None = None + for tensors in self.tensors: for name, info in tensors.items(): - block_size, type_size = GGML_QUANT_SIZES[info.dtype] + shape = info.shape - size = (info.nbytes // type_size) * block_size + if name.endswith(".lora_a"): + last_lora_a = (name, info) + continue + elif name.endswith(".lora_b"): + if last_lora_a is None or last_lora_a[0] != name[:-1] + "a": + # Bail when the LoRA pair can't be found trivially + logger.warning("can't measure LoRA size correctly, tensor order is unusual") + return 0, 0, 0, 0 + else: + shape = (*shape[:-1], last_lora_a[1].shape[-1]) + + size = prod(shape) if "_exps." in name: - expert_params += (size // info.shape[-3]) - expert_sum += info.shape[-3] + expert_params += (size // shape[-3]) + expert_sum += shape[-3] n_expert_tensors += 1 else: shared_params += size diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 7d6478189..8b599160d 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -5,7 +5,7 @@ import json import yaml import logging from pathlib import Path -from typing import Any, Optional +from typing import Any, Literal, Optional from dataclasses import dataclass from .constants import Keys @@ -44,7 +44,7 @@ class Metadata: datasets: Optional[list[str]] = None @staticmethod - def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None) -> Metadata: + def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: # This grabs as many contextual authorship metadata as possible from the model repository # making any conversion as required to match the gguf kv store metadata format # as well as giving users the ability to override any authorship metadata that may be incorrect @@ -56,7 +56,7 @@ class Metadata: hf_params = Metadata.load_hf_parameters(model_path) # heuristics - metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path) + metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp @@ -150,7 +150,7 @@ class Metadata: return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()]) @staticmethod - def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: + def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: # Huggingface often store model id as '/' # so let's parse it and apply some heuristics if possible for model name components @@ -175,35 +175,82 @@ class Metadata: if org_component is not None and org_component[0] == '.': org_component = None - # Regular expression to extract model name components - # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' - regex_match = re.compile(r'^' - r'(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' - r'(?:-(?P(?:\d+x)?(\d+\.)?\d+[A-Za-z](?:-[A-Za-z]+(\d+\.)?\d+[A-Za-z]+)?)(?:-(?P[A-Za-z0-9\s-]+))?)?' - r'(?:-(?Pv\d+(?:\.\d+)*))?' - r'$').match(model_full_name_component) + name_parts: list[str] = model_full_name_component.split('-') + name_types: list[ + set[Literal["basename", "size_label", "finetune", "version", "type"]] + ] = [set() for _ in name_parts] - if not regex_match: - return model_full_name_component, org_component, None, None, None, None + # Annotate the name + for i, part in enumerate(name_parts): + # Version + if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE): + name_types[i].add("version") + # Quant type (should not be there for base models, but still annotated) + elif re.fullmatch(r'[iI]?[qQ]\d(_\w)*', part): + name_types[i].add("type") + name_parts[i] = part.upper() + # Model size + elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[kMBT]|small|mini|medium|large|xl)', part, re.IGNORECASE): + part = part.replace("_", ".") + if len(part) > 1 and part[-2].isdecimal(): + if part[-1] in "mbt": + part = part[:-1] + part[-1].upper() + elif part[-1] in "k": + part = part[:-1] + part[-1].lower() + if total_params > 0: + try: + label_params = float(part[:-1]) * pow(1000, " kMBT".find(part[-1])) + # Only use it as a size label if it's close or bigger than the model size + # Note that LoRA adapters don't necessarily include all layers, + # so this is why bigger label sizes are accepted. + # Do not use the size label when it's smaller than 3/4 of the model size + if total_params - label_params > total_params // 4: + # Likely a context length + name_types[i].add("finetune") + except ValueError: + # Failed to convert the size label to float, use it anyway + pass + if len(name_types[i]) == 0: + name_types[i].add("size_label") + name_parts[i] = part + # Some easy to recognize finetune names + elif i > 0 and re.fullmatch(r'chat|instruct|vision', part, re.IGNORECASE): + name_types[i].add("finetune") - components = regex_match.groupdict() - basename = components.get("basename") - size_label = components.get("size_label") - finetune = components.get("finetune") - version = components.get("version") + at_start = True + # Find the basename through the annotated name + for part, t in zip(name_parts, name_types): + if at_start and ((len(t) == 0 and part[0].isalpha()) or "version" in t): + t.add("basename") + else: + if at_start: + at_start = False + if len(t) == 0: + t.add("finetune") - # Base name required at a minimum - if basename is None: - return model_full_name_component, None, None, None, None, None + # Remove the basename annotation from trailing version + for part, t in zip(reversed(name_parts), reversed(name_types)): + if "basename" in t: + if len(t) > 1: + t.remove("basename") + else: + break - # Need to capture at least one component that is not basename - if size_label is None and version is None and finetune is None: - return model_full_name_component, None, None, None, None, None + basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None + size_label = "-".join(s for s, t in zip(name_parts, name_types) if "size_label" in t) or None + finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None + # TODO: should the basename version always be excluded? + # TODO: should multiple versions be joined together? + version = ([v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t] or [None])[-1] + + if size_label is None and finetune is None and version is None: + # Too ambiguous, output nothing + basename = None return model_full_name_component, org_component, basename, finetune, version, size_label @staticmethod - def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None) -> Metadata: + def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata: # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 # Model Card Heuristics @@ -242,7 +289,8 @@ class Metadata: metadata.base_models = [] for model_id in metadata_base_models: - model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) + # NOTE: model size of base model is assumed to be similar to the size of the current model + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) base_model = {} if model_full_name_component is not None: base_model["name"] = Metadata.id_to_title(model_full_name_component) @@ -317,7 +365,7 @@ class Metadata: # Use _name_or_path only if its actually a model name and not some computer path # e.g. 'meta-llama/Llama-2-7b-hf' model_id = hf_name_or_path - model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) if metadata.name is None and model_full_name_component is not None: metadata.name = Metadata.id_to_title(model_full_name_component) if metadata.organization is None and org_component is not None: @@ -335,7 +383,7 @@ class Metadata: ############################################ if model_path is not None: model_id = model_path.name - model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) if metadata.name is None and model_full_name_component is not None: metadata.name = Metadata.id_to_title(model_full_name_component) if metadata.organization is None and org_component is not None: diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 2c25e85d4..9d048603b 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 import unittest -import gguf # noqa: F401 from pathlib import Path +import os +import sys +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +import gguf class TestMetadataMethod(unittest.TestCase): @@ -49,7 +55,7 @@ class TestMetadataMethod(unittest.TestCase): # Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing... self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"), - ('Qwen1.5-MoE-A2.7B-Chat', None, None, None, None, None)) + ('Qwen1.5-MoE-A2.7B-Chat', None, 'Qwen1.5-MoE', 'Chat', None, 'A2.7B')) # Capture 'sub size labels' e.g. A14B in '57B-A14B' usually refers to activated params/weight count self.assertEqual(gguf.Metadata.get_model_id_components("Qwen2-57B-A14B-Instruct"), @@ -57,26 +63,29 @@ class TestMetadataMethod(unittest.TestCase): # Check that it can handle a real model id with no version code # Note that 4k in this string is non standard and microsoft were referring to context length rather than weight count - self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct"), - ('Phi-3-mini-4k-instruct', 'microsoft', 'Phi-3-mini', 'instruct', None, '4k')) + self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct", 4*10**9), + ('Phi-3-mini-4k-instruct', 'microsoft', 'Phi-3', '4k-instruct', None, 'mini')) # There is some legitimate models with only thousands of parameters - self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k"), + self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50*10**3), ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50k')) - # None standard and not easy to disambiguate, best to err in caution and output nothing + # None standard and not easy to disambiguate self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"), - ('DeepSeek-Coder-V2-Lite-Instruct', None, None, None, None, None)) + ('DeepSeek-Coder-V2-Lite-Instruct', None, 'DeepSeek-Coder-V2-Lite', 'Instruct', None, None)) # This is a real model_id where they append 2DPO to refer to Direct Preference Optimization - # Not able to easily reject '2dpo' while keeping to simple regexp, so best to reject self.assertEqual(gguf.Metadata.get_model_id_components("crestf411/daybreak-kunoichi-2dpo-7b"), - ('daybreak-kunoichi-2dpo-7b', 'crestf411', None, None, None, None)) + ('daybreak-kunoichi-2dpo-7b', 'crestf411', 'daybreak-kunoichi', '2dpo', None, '7B')) # This is a real model id where the weight size has a decimal point self.assertEqual(gguf.Metadata.get_model_id_components("Qwen2-0.5B-Instruct"), ('Qwen2-0.5B-Instruct', None, 'Qwen2', 'Instruct', None, '0.5B')) + # Uses an underscore in the size label + self.assertEqual(gguf.Metadata.get_model_id_components("smallcloudai/Refact-1_6B-fim"), + ('Refact-1_6B-fim', 'smallcloudai', 'Refact', 'fim', None, '1.6B')) + def test_apply_metadata_heuristic_from_model_card(self): model_card = { 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], @@ -88,7 +97,7 @@ class TestMetadataMethod(unittest.TestCase): } got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) expect = gguf.Metadata() - expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1'}] + expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': 'v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}] expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'] expect.languages=['en'] expect.datasets=['teknium/OpenHermes-2.5'] @@ -97,12 +106,15 @@ class TestMetadataMethod(unittest.TestCase): def test_apply_metadata_heuristic_from_hf_parameters(self): hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} - got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, hf_params, None) - expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, size_label='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card=None, hf_params=hf_params, model_path=None) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) def test_apply_metadata_heuristic_from_model_dir(self): model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") - got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, None, model_dir_path) - expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, size_label='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card=None, hf_params=None, model_path=model_dir_path) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) + +if __name__ == "__main__": + unittest.main()