convert-*.py: move per model weight estimation away from util back to main script
plus some refactoring
This commit is contained in:
parent
684c604eca
commit
b1927eed82
3 changed files with 90 additions and 109 deletions
|
@ -47,7 +47,6 @@ AnyModel = TypeVar("AnyModel", bound="type[Model]")
|
||||||
class Model:
|
class Model:
|
||||||
_model_classes: dict[str, type[Model]] = {}
|
_model_classes: dict[str, type[Model]] = {}
|
||||||
|
|
||||||
model_name: str
|
|
||||||
dir_model: Path
|
dir_model: Path
|
||||||
ftype: gguf.LlamaFileType
|
ftype: gguf.LlamaFileType
|
||||||
is_big_endian: bool
|
is_big_endian: bool
|
||||||
|
@ -72,6 +71,10 @@ class Model:
|
||||||
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
|
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
|
||||||
if type(self) is Model:
|
if type(self) is Model:
|
||||||
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
||||||
|
|
||||||
|
if metadata is None:
|
||||||
|
raise TypeError("authorship metadata must be provided")
|
||||||
|
|
||||||
self.dir_model = dir_model
|
self.dir_model = dir_model
|
||||||
self.ftype = ftype
|
self.ftype = ftype
|
||||||
self.is_big_endian = is_big_endian
|
self.is_big_endian = is_big_endian
|
||||||
|
@ -121,16 +124,20 @@ class Model:
|
||||||
|
|
||||||
self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch)
|
self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch)
|
||||||
|
|
||||||
|
# Fallback to model architecture name if metadata name is still missing
|
||||||
|
if self.metadata.name is None:
|
||||||
|
self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch]
|
||||||
|
|
||||||
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
||||||
output_type = self.ftype.name.partition("_")[2]
|
output_type = self.ftype.name.partition("_")[2]
|
||||||
|
|
||||||
# Update authorship metadata class with parameter size class (useful for leader boards)
|
# Update authorship metadata class with parameter size class (useful for leader boards)
|
||||||
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
||||||
weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count)
|
weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count)
|
||||||
self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate)
|
self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate)
|
||||||
|
|
||||||
# Generate default filename based on model specification and available metadata
|
# Generate default filename based on model specification and available metadata
|
||||||
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
|
self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
|
||||||
|
|
||||||
# Filename Output
|
# Filename Output
|
||||||
if fname_out is not None:
|
if fname_out is not None:
|
||||||
|
@ -229,37 +236,36 @@ class Model:
|
||||||
return new_name
|
return new_name
|
||||||
|
|
||||||
def set_gguf_meta_model(self):
|
def set_gguf_meta_model(self):
|
||||||
self.gguf_writer.add_name(self.model_name)
|
self.gguf_writer.add_name(self.metadata.name)
|
||||||
|
|
||||||
if self.metadata is not None:
|
if self.metadata.basename is not None:
|
||||||
if self.metadata.basename is not None:
|
self.gguf_writer.add_basename(self.metadata.basename)
|
||||||
self.gguf_writer.add_basename(self.metadata.basename)
|
if self.metadata.finetune is not None:
|
||||||
if self.metadata.finetune is not None:
|
self.gguf_writer.add_finetune(self.metadata.finetune)
|
||||||
self.gguf_writer.add_finetune(self.metadata.finetune)
|
if self.metadata.author is not None:
|
||||||
if self.metadata.author is not None:
|
self.gguf_writer.add_author(self.metadata.author)
|
||||||
self.gguf_writer.add_author(self.metadata.author)
|
if self.metadata.version is not None:
|
||||||
if self.metadata.version is not None:
|
self.gguf_writer.add_version(self.metadata.version)
|
||||||
self.gguf_writer.add_version(self.metadata.version)
|
if self.metadata.base_version is not None:
|
||||||
if self.metadata.base_version is not None:
|
self.gguf_writer.add_base_version(self.metadata.base_version)
|
||||||
self.gguf_writer.add_base_version(self.metadata.base_version)
|
if self.metadata.url is not None:
|
||||||
if self.metadata.url is not None:
|
self.gguf_writer.add_url(self.metadata.url)
|
||||||
self.gguf_writer.add_url(self.metadata.url)
|
if self.metadata.description is not None:
|
||||||
if self.metadata.description is not None:
|
self.gguf_writer.add_description(self.metadata.description)
|
||||||
self.gguf_writer.add_description(self.metadata.description)
|
if self.metadata.license is not None:
|
||||||
if self.metadata.license is not None:
|
self.gguf_writer.add_license(self.metadata.license)
|
||||||
self.gguf_writer.add_license(self.metadata.license)
|
if self.metadata.source_url is not None:
|
||||||
if self.metadata.source_url is not None:
|
self.gguf_writer.add_source_url(self.metadata.source_url)
|
||||||
self.gguf_writer.add_source_url(self.metadata.source_url)
|
if self.metadata.source_hf_repo is not None:
|
||||||
if self.metadata.source_hf_repo is not None:
|
self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
|
||||||
self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
|
if self.metadata.parameter_size_class is not None:
|
||||||
if self.metadata.parameter_size_class is not None:
|
self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class)
|
||||||
self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class)
|
if self.metadata.tags is not None:
|
||||||
if self.metadata.tags is not None:
|
self.gguf_writer.add_tags(self.metadata.tags)
|
||||||
self.gguf_writer.add_tags(self.metadata.tags)
|
if self.metadata.languages is not None:
|
||||||
if self.metadata.languages is not None:
|
self.gguf_writer.add_languages(self.metadata.languages)
|
||||||
self.gguf_writer.add_languages(self.metadata.languages)
|
if self.metadata.datasets is not None:
|
||||||
if self.metadata.datasets is not None:
|
self.gguf_writer.add_datasets(self.metadata.datasets)
|
||||||
self.gguf_writer.add_datasets(self.metadata.datasets)
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_block_count(self.block_count)
|
self.gguf_writer.add_block_count(self.block_count)
|
||||||
|
@ -415,23 +421,30 @@ class Model:
|
||||||
self.gguf_writer.write_kv_data_to_file()
|
self.gguf_writer.write_kv_data_to_file()
|
||||||
self.gguf_writer.close()
|
self.gguf_writer.close()
|
||||||
|
|
||||||
# Set model name based on latest metadata either provided or calculated from environment
|
def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int:
|
||||||
@staticmethod
|
# TODO: Ensure parameter count is accurate throughout various model type
|
||||||
def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
|
# May currently overestimate parameter count in Mamba model because
|
||||||
if metadata is not None and metadata.name is not None:
|
# output weights is tied with token embeddings.
|
||||||
# Explicit Metadata Was Provided By User
|
sum_weight_estimate = 0
|
||||||
return metadata.name
|
for name, data_torch in tensors:
|
||||||
elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters:
|
# Got A Tensor
|
||||||
# Hugging Face Parameters Model Name or Model Folder Name is Provided
|
|
||||||
return huggingface_parameters["_name_or_path"]
|
# We don't need these
|
||||||
elif huggingface_parameters is not None and "model_type" in huggingface_parameters:
|
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
||||||
# Hugging Face Parameters Model Type is Provided
|
continue
|
||||||
return huggingface_parameters["model_type"]
|
|
||||||
elif dir_model is not None and dir_model.name is not None:
|
# Calculate Tensor Volume
|
||||||
# Use directory folder name
|
sum_weights_in_tensor = 1
|
||||||
return dir_model.name
|
for dim in data_torch.shape:
|
||||||
else:
|
sum_weights_in_tensor *= dim
|
||||||
return gguf.MODEL_ARCH_NAMES[model_arch]
|
|
||||||
|
# Add Tensor Volume To Running Count
|
||||||
|
sum_weight_estimate += sum_weights_in_tensor
|
||||||
|
|
||||||
|
# Calculate weight estimate per model
|
||||||
|
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
|
||||||
|
|
||||||
|
return per_model_weight_estimate
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
|
def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
|
||||||
|
|
|
@ -773,7 +773,7 @@ class OutputFile:
|
||||||
def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None:
|
def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None:
|
||||||
# Metadata About The Model And Its Provenence
|
# Metadata About The Model And Its Provenence
|
||||||
name = "LLaMA"
|
name = "LLaMA"
|
||||||
if metadata is not None and metadata.name is not None:
|
if metadata.name is not None:
|
||||||
name = metadata.name
|
name = metadata.name
|
||||||
elif params.path_model is not None:
|
elif params.path_model is not None:
|
||||||
name = params.path_model.name
|
name = params.path_model.name
|
||||||
|
@ -783,29 +783,28 @@ class OutputFile:
|
||||||
|
|
||||||
self.gguf.add_name(name)
|
self.gguf.add_name(name)
|
||||||
|
|
||||||
if metadata is not None:
|
if metadata.basename is not None:
|
||||||
if metadata.basename is not None:
|
self.gguf.add_basename(metadata.basename)
|
||||||
self.gguf.add_basename(metadata.basename)
|
if metadata.finetune is not None:
|
||||||
if metadata.finetune is not None:
|
self.gguf.add_finetune(metadata.finetune)
|
||||||
self.gguf.add_finetune(metadata.finetune)
|
if metadata.author is not None:
|
||||||
if metadata.author is not None:
|
self.gguf.add_author(metadata.author)
|
||||||
self.gguf.add_author(metadata.author)
|
if metadata.version is not None:
|
||||||
if metadata.version is not None:
|
self.gguf.add_version(metadata.version)
|
||||||
self.gguf.add_version(metadata.version)
|
if metadata.base_version is not None:
|
||||||
if metadata.base_version is not None:
|
self.gguf.add_base_version(metadata.base_version)
|
||||||
self.gguf.add_base_version(metadata.base_version)
|
if metadata.url is not None:
|
||||||
if metadata.url is not None:
|
self.gguf.add_url(metadata.url)
|
||||||
self.gguf.add_url(metadata.url)
|
if metadata.description is not None:
|
||||||
if metadata.description is not None:
|
self.gguf.add_description(metadata.description)
|
||||||
self.gguf.add_description(metadata.description)
|
if metadata.license is not None:
|
||||||
if metadata.license is not None:
|
self.gguf.add_license(metadata.license)
|
||||||
self.gguf.add_license(metadata.license)
|
if metadata.source_url is not None:
|
||||||
if metadata.source_url is not None:
|
self.gguf.add_source_url(metadata.source_url)
|
||||||
self.gguf.add_source_url(metadata.source_url)
|
if metadata.source_hf_repo is not None:
|
||||||
if metadata.source_hf_repo is not None:
|
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
|
||||||
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
|
if metadata.tags is not None:
|
||||||
if metadata.tags is not None:
|
self.gguf_writer.add_tags(metadata.tags)
|
||||||
self.gguf_writer.add_tags(metadata.tags)
|
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
# Metadata About The Neural Architecture Itself
|
# Metadata About The Neural Architecture Itself
|
||||||
|
@ -1197,10 +1196,10 @@ class VocabFactory:
|
||||||
|
|
||||||
|
|
||||||
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str:
|
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str:
|
||||||
name = metadata.name if metadata is not None and metadata.name is not None else model_name
|
name = metadata.name if metadata.name is not None else model_name
|
||||||
basename = metadata.basename if metadata is not None and metadata.basename is not None else None
|
basename = metadata.basename if metadata.basename is not None else None
|
||||||
finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None
|
finetune = metadata.finetune if metadata.finetune is not None else None
|
||||||
version = metadata.version if metadata is not None and metadata.version is not None else None
|
version = metadata.version if metadata.version is not None else None
|
||||||
|
|
||||||
output_type = {
|
output_type = {
|
||||||
GGMLFileType.AllF32: "F32",
|
GGMLFileType.AllF32: "F32",
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Iterator
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from torch import Tensor
|
|
||||||
|
|
||||||
|
|
||||||
def fill_templated_filename(filename: str, output_type: str):
|
def fill_templated_filename(filename: str, output_type: str):
|
||||||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||||
|
@ -15,32 +10,6 @@ def fill_templated_filename(filename: str, output_type: str):
|
||||||
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
||||||
|
|
||||||
|
|
||||||
def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int:
|
|
||||||
# TODO: Ensure parameter count is accurate throughout various model type
|
|
||||||
# May currently overestimate parameter count in Mamba model because
|
|
||||||
# output weights is tied with token embeddings.
|
|
||||||
sum_weight_estimate = 0
|
|
||||||
for name, data_torch in tensors:
|
|
||||||
# Got A Tensor
|
|
||||||
|
|
||||||
# We don't need these
|
|
||||||
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Calculate Tensor Volume
|
|
||||||
sum_weights_in_tensor = 1
|
|
||||||
for dim in data_torch.shape:
|
|
||||||
sum_weights_in_tensor *= dim
|
|
||||||
|
|
||||||
# Add Tensor Volume To Running Count
|
|
||||||
sum_weight_estimate += sum_weights_in_tensor
|
|
||||||
|
|
||||||
# Calculate weight estimate per model
|
|
||||||
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
|
|
||||||
|
|
||||||
return per_model_weight_estimate
|
|
||||||
|
|
||||||
|
|
||||||
def model_weight_count_rounded_notation(model_params_count: int) -> str:
|
def model_weight_count_rounded_notation(model_params_count: int) -> str:
|
||||||
if model_params_count > 1e15 :
|
if model_params_count > 1e15 :
|
||||||
# Quadrillion Of Parameters
|
# Quadrillion Of Parameters
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue