convert-*.py: add parameter size class
This commit is contained in:
parent
8f734083dd
commit
0f1d50fab7
7 changed files with 42 additions and 23 deletions
|
@ -10,7 +10,6 @@ import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import frontmatter
|
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
@ -90,11 +89,9 @@ class Model:
|
||||||
self.tensor_names = None
|
self.tensor_names = None
|
||||||
self.metadata = metadata
|
self.metadata = metadata
|
||||||
|
|
||||||
model_tensors = self.get_tensors()
|
|
||||||
|
|
||||||
if self.ftype == gguf.LlamaFileType.GUESSED:
|
if self.ftype == gguf.LlamaFileType.GUESSED:
|
||||||
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
||||||
_, first_tensor = next(model_tensors)
|
_, first_tensor = next(self.get_tensors())
|
||||||
if first_tensor.dtype == torch.float16:
|
if first_tensor.dtype == torch.float16:
|
||||||
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
|
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
|
||||||
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||||
|
@ -127,10 +124,10 @@ class Model:
|
||||||
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
||||||
output_type = self.ftype.name.partition("_")[2]
|
output_type = self.ftype.name.partition("_")[2]
|
||||||
|
|
||||||
# Get Expert Count From huggingface_parameters
|
# Update authorship metadata class with parameter size class (useful for leader boards)
|
||||||
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
||||||
|
weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count)
|
||||||
weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count)
|
self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate)
|
||||||
|
|
||||||
# Generate default filename based on model specification and available metadata
|
# Generate default filename based on model specification and available metadata
|
||||||
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
|
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
|
||||||
|
@ -255,6 +252,8 @@ class Model:
|
||||||
self.gguf_writer.add_source_url(self.metadata.source_url)
|
self.gguf_writer.add_source_url(self.metadata.source_url)
|
||||||
if self.metadata.source_hf_repo is not None:
|
if self.metadata.source_hf_repo is not None:
|
||||||
self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
|
self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
|
||||||
|
if self.metadata.parameter_size_class is not None:
|
||||||
|
self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class)
|
||||||
if self.metadata.tags is not None:
|
if self.metadata.tags is not None:
|
||||||
self.gguf_writer.add_tags(self.metadata.tags)
|
self.gguf_writer.add_tags(self.metadata.tags)
|
||||||
|
|
||||||
|
|
|
@ -1363,6 +1363,8 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
model = convert_to_output_type(model, ftype)
|
model = convert_to_output_type(model, ftype)
|
||||||
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
|
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
|
||||||
|
|
||||||
|
metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count)
|
||||||
|
|
||||||
params.ftype = ftype
|
params.ftype = ftype
|
||||||
logger.info(f"Writing {outfile}, format {ftype}")
|
logger.info(f"Writing {outfile}, format {ftype}")
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@ class Keys:
|
||||||
SOURCE_URL = "general.source.url"
|
SOURCE_URL = "general.source.url"
|
||||||
SOURCE_HF_REPO = "general.source.huggingface.repository"
|
SOURCE_HF_REPO = "general.source.huggingface.repository"
|
||||||
FILE_TYPE = "general.file_type"
|
FILE_TYPE = "general.file_type"
|
||||||
|
PARAMETER_SIZE_CLASS = "general.parameter_size_class"
|
||||||
TAGS = "general.tags"
|
TAGS = "general.tags"
|
||||||
|
|
||||||
class LLM:
|
class LLM:
|
||||||
|
|
|
@ -472,6 +472,9 @@ class GGUFWriter:
|
||||||
def add_file_type(self, ftype: int) -> None:
|
def add_file_type(self, ftype: int) -> None:
|
||||||
self.add_uint32(Keys.General.FILE_TYPE, ftype)
|
self.add_uint32(Keys.General.FILE_TYPE, ftype)
|
||||||
|
|
||||||
|
def add_parameter_size_class(self, parameter_size_class: str) -> None:
|
||||||
|
self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class)
|
||||||
|
|
||||||
def add_tags(self, tags: Sequence[str]) -> None:
|
def add_tags(self, tags: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.Tokenizer.TAGS, tags)
|
self.add_array(Keys.Tokenizer.TAGS, tags)
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,8 @@ class Metadata:
|
||||||
license_link: Optional[str] = None
|
license_link: Optional[str] = None
|
||||||
source_url: Optional[str] = None
|
source_url: Optional[str] = None
|
||||||
source_hf_repo: Optional[str] = None
|
source_hf_repo: Optional[str] = None
|
||||||
tags: Optional[List[str]] = None
|
parameter_size_class: Optional[str] = None
|
||||||
|
tags: Optional[list[str]] = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(metadata_override_path: Path, model_path: Path) -> Metadata:
|
def load(metadata_override_path: Path, model_path: Path) -> Metadata:
|
||||||
|
@ -89,6 +90,7 @@ class Metadata:
|
||||||
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202
|
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202
|
||||||
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202
|
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202
|
||||||
metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202
|
metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202
|
||||||
|
metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202
|
||||||
metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202
|
metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
|
|
@ -65,6 +65,17 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str:
|
||||||
return f"{round(scaled_model_params)}{scale_suffix}"
|
return f"{round(scaled_model_params)}{scale_suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def parameter_size_class(expert_count_int:int, model_params_count: int) -> str:
|
||||||
|
per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count)
|
||||||
|
|
||||||
|
if expert_count_int is not None and expert_count_int > 0:
|
||||||
|
size_class = f"{expert_count_int}x{per_model_rounded_weight_estimate}"
|
||||||
|
else:
|
||||||
|
size_class = f"{per_model_rounded_weight_estimate}"
|
||||||
|
|
||||||
|
return size_class
|
||||||
|
|
||||||
|
|
||||||
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
|
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
|
|
|
@ -3,3 +3,4 @@ sentencepiece~=0.2.0
|
||||||
transformers>=4.40.1,<5.0.0
|
transformers>=4.40.1,<5.0.0
|
||||||
gguf>=0.1.0
|
gguf>=0.1.0
|
||||||
protobuf>=4.21.0,<5.0.0
|
protobuf>=4.21.0,<5.0.0
|
||||||
|
python-frontmatter~=1.0.1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue