convert-*.py: add parameter size class

This commit is contained in:
brian khuu 2024-06-02 15:40:31 +10:00
parent 8f734083dd
commit 0f1d50fab7
7 changed files with 42 additions and 23 deletions

View file

@ -10,7 +10,6 @@ import json
import os import os
import re import re
import sys import sys
import frontmatter
from enum import IntEnum from enum import IntEnum
from pathlib import Path from pathlib import Path
from hashlib import sha256 from hashlib import sha256
@ -90,11 +89,9 @@ class Model:
self.tensor_names = None self.tensor_names = None
self.metadata = metadata self.metadata = metadata
model_tensors = self.get_tensors()
if self.ftype == gguf.LlamaFileType.GUESSED: if self.ftype == gguf.LlamaFileType.GUESSED:
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
_, first_tensor = next(model_tensors) _, first_tensor = next(self.get_tensors())
if first_tensor.dtype == torch.float16: if first_tensor.dtype == torch.float16:
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})") logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
self.ftype = gguf.LlamaFileType.MOSTLY_F16 self.ftype = gguf.LlamaFileType.MOSTLY_F16
@ -127,10 +124,10 @@ class Model:
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
output_type = self.ftype.name.partition("_")[2] output_type = self.ftype.name.partition("_")[2]
# Get Expert Count From huggingface_parameters # Update authorship metadata class with parameter size class (useful for leader boards)
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count)
weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate)
# Generate default filename based on model specification and available metadata # Generate default filename based on model specification and available metadata
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
@ -255,6 +252,8 @@ class Model:
self.gguf_writer.add_source_url(self.metadata.source_url) self.gguf_writer.add_source_url(self.metadata.source_url)
if self.metadata.source_hf_repo is not None: if self.metadata.source_hf_repo is not None:
self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
if self.metadata.parameter_size_class is not None:
self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class)
if self.metadata.tags is not None: if self.metadata.tags is not None:
self.gguf_writer.add_tags(self.metadata.tags) self.gguf_writer.add_tags(self.metadata.tags)

View file

@ -1363,6 +1363,8 @@ def main(args_in: list[str] | None = None) -> None:
model = convert_to_output_type(model, ftype) model = convert_to_output_type(model, ftype)
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count)
params.ftype = ftype params.ftype = ftype
logger.info(f"Writing {outfile}, format {ftype}") logger.info(f"Writing {outfile}, format {ftype}")

View file

@ -37,6 +37,7 @@ class Keys:
SOURCE_URL = "general.source.url" SOURCE_URL = "general.source.url"
SOURCE_HF_REPO = "general.source.huggingface.repository" SOURCE_HF_REPO = "general.source.huggingface.repository"
FILE_TYPE = "general.file_type" FILE_TYPE = "general.file_type"
PARAMETER_SIZE_CLASS = "general.parameter_size_class"
TAGS = "general.tags" TAGS = "general.tags"
class LLM: class LLM:

View file

@ -472,6 +472,9 @@ class GGUFWriter:
def add_file_type(self, ftype: int) -> None: def add_file_type(self, ftype: int) -> None:
self.add_uint32(Keys.General.FILE_TYPE, ftype) self.add_uint32(Keys.General.FILE_TYPE, ftype)
def add_parameter_size_class(self, parameter_size_class: str) -> None:
self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class)
def add_tags(self, tags: Sequence[str]) -> None: def add_tags(self, tags: Sequence[str]) -> None:
self.add_array(Keys.Tokenizer.TAGS, tags) self.add_array(Keys.Tokenizer.TAGS, tags)

View file

@ -26,7 +26,8 @@ class Metadata:
license_link: Optional[str] = None license_link: Optional[str] = None
source_url: Optional[str] = None source_url: Optional[str] = None
source_hf_repo: Optional[str] = None source_hf_repo: Optional[str] = None
tags: Optional[List[str]] = None parameter_size_class: Optional[str] = None
tags: Optional[list[str]] = None
@staticmethod @staticmethod
def load(metadata_override_path: Path, model_path: Path) -> Metadata: def load(metadata_override_path: Path, model_path: Path) -> Metadata:
@ -88,7 +89,8 @@ class Metadata:
metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202
metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202
metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202
metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO, metadata.source_hf_repo) # noqa: E202 metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202
metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202
metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202
return metadata return metadata

View file

@ -65,6 +65,17 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str:
return f"{round(scaled_model_params)}{scale_suffix}" return f"{round(scaled_model_params)}{scale_suffix}"
def parameter_size_class(expert_count_int:int, model_params_count: int) -> str:
per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count)
if expert_count_int is not None and expert_count_int > 0:
size_class = f"{expert_count_int}x{per_model_rounded_weight_estimate}"
else:
size_class = f"{per_model_rounded_weight_estimate}"
return size_class
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention

View file

@ -3,3 +3,4 @@ sentencepiece~=0.2.0
transformers>=4.40.1,<5.0.0 transformers>=4.40.1,<5.0.0
gguf>=0.1.0 gguf>=0.1.0
protobuf>=4.21.0,<5.0.0 protobuf>=4.21.0,<5.0.0
python-frontmatter~=1.0.1