From b1927eed82be06994cb76fb54b3ab3120356a049 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 17:44:53 +1000 Subject: [PATCH] convert-*.py: move per model weight estimation away from util back to main script plus some refactoring --- convert_hf_to_gguf.py | 113 +++++++++++++++++-------------- examples/convert_legacy_llama.py | 55 ++++++++------- gguf-py/gguf/utility.py | 31 --------- 3 files changed, 90 insertions(+), 109 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 245ae94b8..8ff5b067a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -47,7 +47,6 @@ AnyModel = TypeVar("AnyModel", bound="type[Model]") class Model: _model_classes: dict[str, type[Model]] = {} - model_name: str dir_model: Path ftype: gguf.LlamaFileType is_big_endian: bool @@ -72,6 +71,10 @@ class Model: model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") + + if metadata is None: + raise TypeError("authorship metadata must be provided") + self.dir_model = dir_model self.ftype = ftype self.is_big_endian = is_big_endian @@ -121,16 +124,20 @@ class Model: self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) + # Fallback to model architecture name if metadata name is still missing + if self.metadata.name is None: + self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] + # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] # Update authorship metadata class with parameter size class (useful for leader boards) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count) + weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate) # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) + self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) # Filename Output if fname_out is not None: @@ -229,37 +236,36 @@ class Model: return new_name def set_gguf_meta_model(self): - self.gguf_writer.add_name(self.model_name) + self.gguf_writer.add_name(self.metadata.name) - if self.metadata is not None: - if self.metadata.basename is not None: - self.gguf_writer.add_basename(self.metadata.basename) - if self.metadata.finetune is not None: - self.gguf_writer.add_finetune(self.metadata.finetune) - if self.metadata.author is not None: - self.gguf_writer.add_author(self.metadata.author) - if self.metadata.version is not None: - self.gguf_writer.add_version(self.metadata.version) - if self.metadata.base_version is not None: - self.gguf_writer.add_base_version(self.metadata.base_version) - if self.metadata.url is not None: - self.gguf_writer.add_url(self.metadata.url) - if self.metadata.description is not None: - self.gguf_writer.add_description(self.metadata.description) - if self.metadata.license is not None: - self.gguf_writer.add_license(self.metadata.license) - if self.metadata.source_url is not None: - self.gguf_writer.add_source_url(self.metadata.source_url) - if self.metadata.source_hf_repo is not None: - self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) - if self.metadata.parameter_size_class is not None: - self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) - if self.metadata.tags is not None: - self.gguf_writer.add_tags(self.metadata.tags) - if self.metadata.languages is not None: - self.gguf_writer.add_languages(self.metadata.languages) - if self.metadata.datasets is not None: - self.gguf_writer.add_datasets(self.metadata.datasets) + if self.metadata.basename is not None: + self.gguf_writer.add_basename(self.metadata.basename) + if self.metadata.finetune is not None: + self.gguf_writer.add_finetune(self.metadata.finetune) + if self.metadata.author is not None: + self.gguf_writer.add_author(self.metadata.author) + if self.metadata.version is not None: + self.gguf_writer.add_version(self.metadata.version) + if self.metadata.base_version is not None: + self.gguf_writer.add_base_version(self.metadata.base_version) + if self.metadata.url is not None: + self.gguf_writer.add_url(self.metadata.url) + if self.metadata.description is not None: + self.gguf_writer.add_description(self.metadata.description) + if self.metadata.license is not None: + self.gguf_writer.add_license(self.metadata.license) + if self.metadata.source_url is not None: + self.gguf_writer.add_source_url(self.metadata.source_url) + if self.metadata.source_hf_repo is not None: + self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) + if self.metadata.parameter_size_class is not None: + self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) + if self.metadata.tags is not None: + self.gguf_writer.add_tags(self.metadata.tags) + if self.metadata.languages is not None: + self.gguf_writer.add_languages(self.metadata.languages) + if self.metadata.datasets is not None: + self.gguf_writer.add_datasets(self.metadata.datasets) def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.block_count) @@ -415,23 +421,30 @@ class Model: self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() - # Set model name based on latest metadata either provided or calculated from environment - @staticmethod - def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): - if metadata is not None and metadata.name is not None: - # Explicit Metadata Was Provided By User - return metadata.name - elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters: - # Hugging Face Parameters Model Name or Model Folder Name is Provided - return huggingface_parameters["_name_or_path"] - elif huggingface_parameters is not None and "model_type" in huggingface_parameters: - # Hugging Face Parameters Model Type is Provided - return huggingface_parameters["model_type"] - elif dir_model is not None and dir_model.name is not None: - # Use directory folder name - return dir_model.name - else: - return gguf.MODEL_ARCH_NAMES[model_arch] + def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: + # TODO: Ensure parameter count is accurate throughout various model type + # May currently overestimate parameter count in Mamba model because + # output weights is tied with token embeddings. + sum_weight_estimate = 0 + for name, data_torch in tensors: + # Got A Tensor + + # We don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Calculate Tensor Volume + sum_weights_in_tensor = 1 + for dim in data_torch.shape: + sum_weights_in_tensor *= dim + + # Add Tensor Volume To Running Count + sum_weight_estimate += sum_weights_in_tensor + + # Calculate weight estimate per model + per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + + return per_model_weight_estimate @staticmethod def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 2bf008a6b..7bbd77e47 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -773,7 +773,7 @@ class OutputFile: def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None: # Metadata About The Model And Its Provenence name = "LLaMA" - if metadata is not None and metadata.name is not None: + if metadata.name is not None: name = metadata.name elif params.path_model is not None: name = params.path_model.name @@ -783,29 +783,28 @@ class OutputFile: self.gguf.add_name(name) - if metadata is not None: - if metadata.basename is not None: - self.gguf.add_basename(metadata.basename) - if metadata.finetune is not None: - self.gguf.add_finetune(metadata.finetune) - if metadata.author is not None: - self.gguf.add_author(metadata.author) - if metadata.version is not None: - self.gguf.add_version(metadata.version) - if metadata.base_version is not None: - self.gguf.add_base_version(metadata.base_version) - if metadata.url is not None: - self.gguf.add_url(metadata.url) - if metadata.description is not None: - self.gguf.add_description(metadata.description) - if metadata.license is not None: - self.gguf.add_license(metadata.license) - if metadata.source_url is not None: - self.gguf.add_source_url(metadata.source_url) - if metadata.source_hf_repo is not None: - self.gguf.add_source_hf_repo(metadata.source_hf_repo) - if metadata.tags is not None: - self.gguf_writer.add_tags(metadata.tags) + if metadata.basename is not None: + self.gguf.add_basename(metadata.basename) + if metadata.finetune is not None: + self.gguf.add_finetune(metadata.finetune) + if metadata.author is not None: + self.gguf.add_author(metadata.author) + if metadata.version is not None: + self.gguf.add_version(metadata.version) + if metadata.base_version is not None: + self.gguf.add_base_version(metadata.base_version) + if metadata.url is not None: + self.gguf.add_url(metadata.url) + if metadata.description is not None: + self.gguf.add_description(metadata.description) + if metadata.license is not None: + self.gguf.add_license(metadata.license) + if metadata.source_url is not None: + self.gguf.add_source_url(metadata.source_url) + if metadata.source_hf_repo is not None: + self.gguf.add_source_hf_repo(metadata.source_hf_repo) + if metadata.tags is not None: + self.gguf_writer.add_tags(metadata.tags) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself @@ -1197,10 +1196,10 @@ class VocabFactory: def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: - name = metadata.name if metadata is not None and metadata.name is not None else model_name - basename = metadata.basename if metadata is not None and metadata.basename is not None else None - finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None - version = metadata.version if metadata is not None and metadata.version is not None else None + name = metadata.name if metadata.name is not None else model_name + basename = metadata.basename if metadata.basename is not None else None + finetune = metadata.finetune if metadata.finetune is not None else None + version = metadata.version if metadata.version is not None else None output_type = { GGMLFileType.AllF32: "F32", diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 0ee3499e2..429a590b2 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -1,10 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Iterator - -if TYPE_CHECKING: - from torch import Tensor - def fill_templated_filename(filename: str, output_type: str): # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' @@ -15,32 +10,6 @@ def fill_templated_filename(filename: str, output_type: str): OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) -def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: - # TODO: Ensure parameter count is accurate throughout various model type - # May currently overestimate parameter count in Mamba model because - # output weights is tied with token embeddings. - sum_weight_estimate = 0 - for name, data_torch in tensors: - # Got A Tensor - - # We don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Calculate Tensor Volume - sum_weights_in_tensor = 1 - for dim in data_torch.shape: - sum_weights_in_tensor *= dim - - # Add Tensor Volume To Running Count - sum_weight_estimate += sum_weights_in_tensor - - # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate - - return per_model_weight_estimate - - def model_weight_count_rounded_notation(model_params_count: int) -> str: if model_params_count > 1e15 : # Quadrillion Of Parameters