diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7b1697ff3..348e85535 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -128,16 +128,17 @@ class Model: if self.metadata.name is None: self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] + # Generate parameter weight class (useful for leader boards) if not yet determined + if self.metadata.parameter_weight_class is None: + expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None + weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) + self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) + # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] - # Update authorship metadata class with parameter size class (useful for leader boards) - expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) - self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) - # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) + self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_weight_class, output_type) # Filename Output if fname_out is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 30cc66d59..0a26a5503 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1206,6 +1206,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None version = metadata.version if metadata.version is not None else None + parameter_weight_class = metadata.parameter_weight_class if metadata.parameter_weight_class is not None else gguf.parameter_weight_class(expert_count, model_params_count) output_type = { GGMLFileType.AllF32: "F32", @@ -1213,7 +1214,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type) + return gguf.naming_convention(name, basename, finetune, version, parameter_weight_class, output_type) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 6b50f7e45..5ddfd8cb9 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -44,8 +44,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str return size_class - -def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: +def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -55,11 +54,7 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers else: name = "ggml-model" - per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) - if expert_count_int is not None and expert_count_int > 0: - parameters = f"-{expert_count_int}x{per_model_rounded_weight_estimate}" - else: - parameters = f"-{per_model_rounded_weight_estimate}" + parameters = f"-{parameter_weight_class}" if parameter_weight_class is not None else "" finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""