convert-*.py: refactor parameter weight class

2024-06-04 01:14:50 +10:00 · 2024-06-04 01:14:50 +10:00 · 54918ad14e
commit 54918ad14e
parent 32e80e094c
3 changed files with 11 additions and 14 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -128,16 +128,17 @@ class Model:
        if self.metadata.name is None:
            self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch]

+        # Generate parameter weight class (useful for leader boards) if not yet determined
+        if self.metadata.parameter_weight_class is None:
+            expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
+            weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count)
+            self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate)
+
        # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
        output_type = self.ftype.name.partition("_")[2]

-        # Update authorship metadata class with parameter size class (useful for leader boards)
-        expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
-        weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count)
-        self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate)
-
        # Generate default filename based on model specification and available metadata
-        self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
+        self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_weight_class, output_type)

        # Filename Output
        if fname_out is not None:
--- a/examples/convert_legacy_llama.py
+++ b/examples/convert_legacy_llama.py
@ -1206,6 +1206,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_
    basename = metadata.basename if metadata.basename is not None else None
    finetune = metadata.finetune if metadata.finetune is not None else None
    version = metadata.version if metadata.version is not None else None
+    parameter_weight_class = metadata.parameter_weight_class if metadata.parameter_weight_class is not None else gguf.parameter_weight_class(expert_count, model_params_count)

    output_type = {
        GGMLFileType.AllF32:    "F32",
@ -1213,7 +1214,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_
        GGMLFileType.MostlyQ8_0: "Q8_0",
    }[file_type]

-    return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type)
+    return gguf.naming_convention(name, basename, finetune, version, parameter_weight_class, output_type)


 def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:
--- a/gguf-py/gguf/utility.py
+++ b/gguf-py/gguf/utility.py
@ -44,8 +44,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str

    return size_class

-
-def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
+def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str:
    # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention

    if base_name is not None:
@ -55,11 +54,7 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
    else:
        name = "ggml-model"

-    per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count)
-    if expert_count_int is not None and expert_count_int > 0:
-        parameters = f"-{expert_count_int}x{per_model_rounded_weight_estimate}"
-    else:
-        parameters = f"-{per_model_rounded_weight_estimate}"
+    parameters = f"-{parameter_weight_class}" if parameter_weight_class is not None else ""

    finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""