convert-*.py: refactor parameter weight class

This commit is contained in:
brian khuu 2024-06-04 01:14:50 +10:00
parent 32e80e094c
commit 54918ad14e
3 changed files with 11 additions and 14 deletions

View file

@ -128,16 +128,17 @@ class Model:
if self.metadata.name is None:
self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch]
# Generate parameter weight class (useful for leader boards) if not yet determined
if self.metadata.parameter_weight_class is None:
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count)
self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate)
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
output_type = self.ftype.name.partition("_")[2]
# Update authorship metadata class with parameter size class (useful for leader boards)
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count)
self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate)
# Generate default filename based on model specification and available metadata
self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_weight_class, output_type)
# Filename Output
if fname_out is not None:

View file

@ -1206,6 +1206,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_
basename = metadata.basename if metadata.basename is not None else None
finetune = metadata.finetune if metadata.finetune is not None else None
version = metadata.version if metadata.version is not None else None
parameter_weight_class = metadata.parameter_weight_class if metadata.parameter_weight_class is not None else gguf.parameter_weight_class(expert_count, model_params_count)
output_type = {
GGMLFileType.AllF32: "F32",
@ -1213,7 +1214,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_
GGMLFileType.MostlyQ8_0: "Q8_0",
}[file_type]
return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type)
return gguf.naming_convention(name, basename, finetune, version, parameter_weight_class, output_type)
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:

View file

@ -44,8 +44,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str
return size_class
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str:
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
if base_name is not None:
@ -55,11 +54,7 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
else:
name = "ggml-model"
per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count)
if expert_count_int is not None and expert_count_int > 0:
parameters = f"-{expert_count_int}x{per_model_rounded_weight_estimate}"
else:
parameters = f"-{per_model_rounded_weight_estimate}"
parameters = f"-{parameter_weight_class}" if parameter_weight_class is not None else ""
finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""