convert-*.py: add --get-outfile command and refactor
This commit is contained in:
parent
d3a936fd0e
commit
dbb1b471e4
4 changed files with 240 additions and 87 deletions
|
@ -13,7 +13,8 @@ import sys
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
|
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -29,6 +30,42 @@ import gguf
|
||||||
logger = logging.getLogger("hf-to-gguf")
|
logger = logging.getLogger("hf-to-gguf")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Metadata:
|
||||||
|
name: Optional[str] = None
|
||||||
|
author: Optional[str] = None
|
||||||
|
version: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
description: Optional[str] = None
|
||||||
|
licence: Optional[str] = None
|
||||||
|
source_url: Optional[str] = None
|
||||||
|
source_hf_repo: Optional[str] = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(metadata_path: Path) -> Metadata:
|
||||||
|
if metadata_path is None or not metadata_path.exists():
|
||||||
|
return Metadata()
|
||||||
|
|
||||||
|
with open(metadata_path, 'r') as file:
|
||||||
|
data = json.load(file)
|
||||||
|
|
||||||
|
# Create a new Metadata instance
|
||||||
|
metadata = Metadata()
|
||||||
|
|
||||||
|
# Assigning values to Metadata attributes if they exist in the JSON file
|
||||||
|
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
||||||
|
metadata.name = data.get("general.name")
|
||||||
|
metadata.author = data.get("general.author")
|
||||||
|
metadata.version = data.get("general.version")
|
||||||
|
metadata.url = data.get("general.url")
|
||||||
|
metadata.description = data.get("general.description")
|
||||||
|
metadata.license = data.get("general.license")
|
||||||
|
metadata.source_url = data.get("general.source.url")
|
||||||
|
metadata.source_hf_repo = data.get("general.source.huggingface.repository")
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
###### MODEL DEFINITIONS ######
|
###### MODEL DEFINITIONS ######
|
||||||
|
|
||||||
class SentencePieceTokenTypes(IntEnum):
|
class SentencePieceTokenTypes(IntEnum):
|
||||||
|
@ -46,13 +83,13 @@ AnyModel = TypeVar("AnyModel", bound="type[Model]")
|
||||||
class Model:
|
class Model:
|
||||||
_model_classes: dict[str, type[Model]] = {}
|
_model_classes: dict[str, type[Model]] = {}
|
||||||
|
|
||||||
|
model_name: str
|
||||||
dir_model: Path
|
dir_model: Path
|
||||||
ftype: gguf.LlamaFileType
|
ftype: gguf.LlamaFileType
|
||||||
is_big_endian: bool
|
is_big_endian: bool
|
||||||
endianess: gguf.GGUFEndian
|
endianess: gguf.GGUFEndian
|
||||||
use_temp_file: bool
|
use_temp_file: bool
|
||||||
lazy: bool
|
lazy: bool
|
||||||
model_name: str | None
|
|
||||||
part_names: list[str]
|
part_names: list[str]
|
||||||
is_safetensors: bool
|
is_safetensors: bool
|
||||||
hparams: dict[str, Any]
|
hparams: dict[str, Any]
|
||||||
|
@ -60,12 +97,14 @@ class Model:
|
||||||
tensor_map: gguf.TensorNameMap
|
tensor_map: gguf.TensorNameMap
|
||||||
tensor_names: set[str] | None
|
tensor_names: set[str] | None
|
||||||
fname_out: Path
|
fname_out: Path
|
||||||
|
fname_default: Path
|
||||||
gguf_writer: gguf.GGUFWriter
|
gguf_writer: gguf.GGUFWriter
|
||||||
|
metadata: Metadata
|
||||||
|
|
||||||
# subclasses should define this!
|
# subclasses should define this!
|
||||||
model_arch: gguf.MODEL_ARCH
|
model_arch: gguf.MODEL_ARCH
|
||||||
|
|
||||||
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
|
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: Metadata,
|
||||||
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
|
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
|
||||||
if type(self) is Model:
|
if type(self) is Model:
|
||||||
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
||||||
|
@ -84,15 +123,20 @@ class Model:
|
||||||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
||||||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
||||||
self.tensor_names = None
|
self.tensor_names = None
|
||||||
|
self.metadata = metadata
|
||||||
|
|
||||||
|
model_tensors = self.get_tensors()
|
||||||
|
|
||||||
if self.ftype == gguf.LlamaFileType.GUESSED:
|
if self.ftype == gguf.LlamaFileType.GUESSED:
|
||||||
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
||||||
_, first_tensor = next(self.get_tensors())
|
_, first_tensor = next(model_tensors)
|
||||||
if first_tensor.dtype == torch.float16:
|
if first_tensor.dtype == torch.float16:
|
||||||
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
|
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
|
||||||
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||||
else:
|
else:
|
||||||
logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
|
logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
|
||||||
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
|
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
|
||||||
|
|
||||||
ftype_up: str = self.ftype.name.partition("_")[2].upper()
|
ftype_up: str = self.ftype.name.partition("_")[2].upper()
|
||||||
ftype_lw: str = ftype_up.lower()
|
ftype_lw: str = ftype_up.lower()
|
||||||
# allow templating the file name with the output ftype, useful with the "auto" ftype
|
# allow templating the file name with the output ftype, useful with the "auto" ftype
|
||||||
|
@ -100,6 +144,84 @@ class Model:
|
||||||
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
||||||
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
||||||
|
|
||||||
|
# Update any missing authorship metadata with huggingface_parameters
|
||||||
|
if self.metadata is not None and self.metadata.source_hf_repo is None:
|
||||||
|
if self.hparams is not None and "_name_or_path" in self.hparams:
|
||||||
|
self.metadata.source_hf_repo = self.hparams["_name_or_path"]
|
||||||
|
|
||||||
|
# Set model name based on latest metadata either provided or calculated from environment
|
||||||
|
def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
|
||||||
|
if metadata is not None and metadata.name is not None:
|
||||||
|
# Explicit Metadata Was Provided By User
|
||||||
|
return metadata.name
|
||||||
|
elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters:
|
||||||
|
# Hugging Face Parameters Model Name or Model Folder Name is Provided
|
||||||
|
return huggingface_parameters["_name_or_path"]
|
||||||
|
elif huggingface_parameters is not None and "model_type" in huggingface_parameters:
|
||||||
|
# Hugging Face Parameters Model Type is Provided
|
||||||
|
return huggingface_parameters["model_type"]
|
||||||
|
elif dir_model is not None and dir_model.name is not None:
|
||||||
|
# Use directory folder name
|
||||||
|
return dir_model.name
|
||||||
|
else:
|
||||||
|
return gguf.MODEL_ARCH_NAMES[model_arch]
|
||||||
|
self.model_name = get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch)
|
||||||
|
|
||||||
|
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
||||||
|
encodingScheme = self.ftype.name.partition("_")[2]
|
||||||
|
|
||||||
|
# Get Expert Count From huggingface_parameters
|
||||||
|
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
||||||
|
|
||||||
|
def per_model_weight_count_estimation(tensors, expert_count):
|
||||||
|
# TODO: Ensure parameter count is accurate throughout various model type
|
||||||
|
# May currently overestimate parameter count in Mamba model because
|
||||||
|
# output weights is tied with token embeddings.
|
||||||
|
sum_weight_estimate = 0
|
||||||
|
for name, data_torch in tensors:
|
||||||
|
# Got A Tensor
|
||||||
|
|
||||||
|
# We don't need these
|
||||||
|
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Calculate Tensor Volume
|
||||||
|
sum_weights_in_tensor = 1
|
||||||
|
for dim in data_torch.shape:
|
||||||
|
sum_weights_in_tensor *= dim
|
||||||
|
|
||||||
|
# Add Tensor Volume To Running Count
|
||||||
|
sum_weight_estimate += sum_weights_in_tensor
|
||||||
|
|
||||||
|
# Calculate weight estimate per model
|
||||||
|
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
|
||||||
|
|
||||||
|
return per_model_weight_estimate
|
||||||
|
|
||||||
|
weight_estimate = per_model_weight_count_estimation(model_tensors, expert_count)
|
||||||
|
|
||||||
|
# Generate default filename based on model specification and available metadata
|
||||||
|
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, weight_estimate, encodingScheme)
|
||||||
|
|
||||||
|
# Filename Output
|
||||||
|
if fname_out is not None:
|
||||||
|
# custom defined filename and path was provided
|
||||||
|
def fill_templated_filename(filename: str, encodingScheme: str):
|
||||||
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||||
|
ftype_uppercase: str = encodingScheme.upper()
|
||||||
|
ftype_lowercase: str = encodingScheme.lower()
|
||||||
|
return filename.format(ftype_lowercase,
|
||||||
|
outtype=ftype_lowercase, ftype=ftype_lowercase,
|
||||||
|
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
||||||
|
|
||||||
|
self.fname_out = fname_out.parent / fill_templated_filename(fname_out.name, encodingScheme)
|
||||||
|
else:
|
||||||
|
# output in the same directory as the model by default
|
||||||
|
self.fname_out = dir_model.parent / self.fname_default
|
||||||
|
|
||||||
|
# Configure GGUF Writer
|
||||||
|
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __init_subclass__(cls):
|
def __init_subclass__(cls):
|
||||||
# can't use an abstract property, because overriding it without type errors
|
# can't use an abstract property, because overriding it without type errors
|
||||||
|
@ -185,8 +307,26 @@ class Model:
|
||||||
raise ValueError(f"Can not map tensor {name!r}")
|
raise ValueError(f"Can not map tensor {name!r}")
|
||||||
return new_name
|
return new_name
|
||||||
|
|
||||||
|
def set_gguf_meta_model(self):
|
||||||
|
self.gguf_writer.add_name(self.model_name)
|
||||||
|
|
||||||
|
if self.metadata is not None:
|
||||||
|
if self.metadata.author is not None:
|
||||||
|
self.gguf_writer.add_author(self.metadata.author)
|
||||||
|
if self.metadata.version is not None:
|
||||||
|
self.gguf_writer.add_version(self.metadata.version)
|
||||||
|
if self.metadata.url is not None:
|
||||||
|
self.gguf_writer.add_url(self.metadata.url)
|
||||||
|
if self.metadata.description is not None:
|
||||||
|
self.gguf_writer.add_description(self.metadata.description)
|
||||||
|
if self.metadata.licence is not None:
|
||||||
|
self.gguf_writer.add_licence(self.metadata.licence)
|
||||||
|
if self.metadata.source_url is not None:
|
||||||
|
self.gguf_writer.add_source_url(self.metadata.source_url)
|
||||||
|
if self.metadata.source_hf_repo is not None:
|
||||||
|
self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo)
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_block_count(self.block_count)
|
self.gguf_writer.add_block_count(self.block_count)
|
||||||
|
|
||||||
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
|
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
|
||||||
|
@ -773,7 +913,6 @@ class GPTNeoXModel(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.hparams["num_hidden_layers"]
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
@ -829,7 +968,6 @@ class BloomModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.BLOOM
|
model_arch = gguf.MODEL_ARCH.BLOOM
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name("Bloom")
|
|
||||||
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
|
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
|
||||||
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
|
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
|
||||||
self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
|
self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
|
||||||
|
@ -906,7 +1044,6 @@ class MPTModel(Model):
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.hparams["n_layers"]
|
block_count = self.hparams["n_layers"]
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
|
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
|
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
@ -945,7 +1082,6 @@ class OrionModel(Model):
|
||||||
block_count = self.hparams["num_hidden_layers"]
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
head_count = self.hparams["num_attention_heads"]
|
head_count = self.hparams["num_attention_heads"]
|
||||||
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||||
hf_repo = self.hparams.get("_name_or_path", "")
|
|
||||||
|
|
||||||
ctx_length = 0
|
ctx_length = 0
|
||||||
if "max_sequence_length" in self.hparams:
|
if "max_sequence_length" in self.hparams:
|
||||||
|
@ -958,8 +1094,6 @@ class OrionModel(Model):
|
||||||
raise ValueError("gguf: can not find ctx length parameter.")
|
raise ValueError("gguf: can not find ctx length parameter.")
|
||||||
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_source_hf_repo(hf_repo)
|
|
||||||
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||||
self.gguf_writer.add_context_length(ctx_length)
|
self.gguf_writer.add_context_length(ctx_length)
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
@ -983,7 +1117,6 @@ class BaichuanModel(Model):
|
||||||
block_count = self.hparams["num_hidden_layers"]
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
head_count = self.hparams["num_attention_heads"]
|
head_count = self.hparams["num_attention_heads"]
|
||||||
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||||
hf_repo = self.hparams.get("_name_or_path", "")
|
|
||||||
|
|
||||||
ctx_length = 0
|
ctx_length = 0
|
||||||
if "max_sequence_length" in self.hparams:
|
if "max_sequence_length" in self.hparams:
|
||||||
|
@ -995,8 +1128,6 @@ class BaichuanModel(Model):
|
||||||
else:
|
else:
|
||||||
raise ValueError("gguf: can not find ctx length parameter.")
|
raise ValueError("gguf: can not find ctx length parameter.")
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_source_hf_repo(hf_repo)
|
|
||||||
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||||
self.gguf_writer.add_context_length(ctx_length)
|
self.gguf_writer.add_context_length(ctx_length)
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
@ -1110,7 +1241,6 @@ class XverseModel(Model):
|
||||||
block_count = self.hparams["num_hidden_layers"]
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
head_count = self.hparams["num_attention_heads"]
|
head_count = self.hparams["num_attention_heads"]
|
||||||
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||||
hf_repo = self.hparams.get("_name_or_path", "")
|
|
||||||
|
|
||||||
ctx_length = 0
|
ctx_length = 0
|
||||||
if "max_sequence_length" in self.hparams:
|
if "max_sequence_length" in self.hparams:
|
||||||
|
@ -1122,8 +1252,6 @@ class XverseModel(Model):
|
||||||
else:
|
else:
|
||||||
raise ValueError("gguf: can not find ctx length parameter.")
|
raise ValueError("gguf: can not find ctx length parameter.")
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_source_hf_repo(hf_repo)
|
|
||||||
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||||
self.gguf_writer.add_context_length(ctx_length)
|
self.gguf_writer.add_context_length(ctx_length)
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
@ -1182,7 +1310,6 @@ class FalconModel(Model):
|
||||||
if n_head_kv is None:
|
if n_head_kv is None:
|
||||||
n_head_kv = self.hparams.get("n_head_kv", 1) # old name
|
n_head_kv = self.hparams.get("n_head_kv", 1) # old name
|
||||||
|
|
||||||
self.gguf_writer.add_name("Falcon")
|
|
||||||
self.gguf_writer.add_context_length(2048) # not in config.json
|
self.gguf_writer.add_context_length(2048) # not in config.json
|
||||||
self.gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
|
self.gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
@ -1227,7 +1354,6 @@ class StarCoderModel(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.hparams["n_layer"]
|
block_count = self.hparams["n_layer"]
|
||||||
|
|
||||||
self.gguf_writer.add_name("StarCoder")
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
||||||
self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
|
self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
|
||||||
|
@ -1262,7 +1388,6 @@ class RefactModel(Model):
|
||||||
|
|
||||||
block_count = self.hparams["n_layer"]
|
block_count = self.hparams["n_layer"]
|
||||||
|
|
||||||
self.gguf_writer.add_name("Refact")
|
|
||||||
# refact uses Alibi. So this is from config.json which might be used by training.
|
# refact uses Alibi. So this is from config.json which might be used by training.
|
||||||
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
||||||
|
@ -1317,7 +1442,6 @@ class StableLMModel(Model):
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
block_count = hparams["num_hidden_layers"]
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
||||||
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
@ -1560,7 +1684,6 @@ class GrokModel(Model):
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
self.gguf_writer.add_name("Grok")
|
|
||||||
|
|
||||||
_experts: list[dict[str, Tensor]] | None = None
|
_experts: list[dict[str, Tensor]] | None = None
|
||||||
|
|
||||||
|
@ -1609,7 +1732,6 @@ class DbrxModel(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
ffn_config = self.hparams["ffn_config"]
|
ffn_config = self.hparams["ffn_config"]
|
||||||
attn_config = self.hparams["attn_config"]
|
attn_config = self.hparams["attn_config"]
|
||||||
self.gguf_writer.add_name(self.hparams["model_type"])
|
|
||||||
self.gguf_writer.add_block_count(self.hparams["n_layers"])
|
self.gguf_writer.add_block_count(self.hparams["n_layers"])
|
||||||
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
|
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
|
||||||
|
@ -1678,7 +1800,6 @@ class MiniCPMModel(Model):
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.hparams["num_hidden_layers"]
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
self.gguf_writer.add_name("MiniCPM")
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
@ -1748,7 +1869,6 @@ class QwenModel(Model):
|
||||||
self._set_vocab_qwen()
|
self._set_vocab_qwen()
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name("Qwen")
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
||||||
self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
|
self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
@ -1839,7 +1959,6 @@ class GPT2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.GPT2
|
model_arch = gguf.MODEL_ARCH.GPT2
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
||||||
self.gguf_writer.add_context_length(self.hparams["n_ctx"])
|
self.gguf_writer.add_context_length(self.hparams["n_ctx"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
||||||
|
@ -1882,7 +2001,6 @@ class Phi2Model(Model):
|
||||||
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
||||||
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
|
|
||||||
self.gguf_writer.add_name("Phi2")
|
|
||||||
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
|
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
|
||||||
|
|
||||||
self.gguf_writer.add_embedding_length(n_embd)
|
self.gguf_writer.add_embedding_length(n_embd)
|
||||||
|
@ -2004,7 +2122,6 @@ class Phi3MiniModel(Model):
|
||||||
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
|
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
|
||||||
rope_dims = n_embd // n_head
|
rope_dims = n_embd // n_head
|
||||||
|
|
||||||
self.gguf_writer.add_name("Phi3")
|
|
||||||
self.gguf_writer.add_context_length(max_pos_embds)
|
self.gguf_writer.add_context_length(max_pos_embds)
|
||||||
self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
|
self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
|
||||||
self.gguf_writer.add_embedding_length(n_embd)
|
self.gguf_writer.add_embedding_length(n_embd)
|
||||||
|
@ -2061,7 +2178,6 @@ class PlamoModel(Model):
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
block_count = hparams["num_hidden_layers"]
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
|
||||||
self.gguf_writer.add_name("PLaMo")
|
|
||||||
self.gguf_writer.add_context_length(4096) # not in config.json
|
self.gguf_writer.add_context_length(4096) # not in config.json
|
||||||
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
||||||
|
@ -2106,7 +2222,6 @@ class CodeShellModel(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.hparams["n_layer"]
|
block_count = self.hparams["n_layer"]
|
||||||
|
|
||||||
self.gguf_writer.add_name("CodeShell")
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
||||||
self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
|
self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
|
||||||
|
@ -2265,7 +2380,6 @@ class InternLM2Model(Model):
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name("InternLM2")
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
||||||
self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
|
self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
@ -2433,7 +2547,6 @@ class GemmaModel(Model):
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
block_count = hparams["num_hidden_layers"]
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
||||||
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
@ -2549,7 +2662,6 @@ class MambaModel(Model):
|
||||||
# Fail early for models which don't have a block expansion factor of 2
|
# Fail early for models which don't have a block expansion factor of 2
|
||||||
assert d_inner == 2 * d_model
|
assert d_inner == 2 * d_model
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
|
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
|
||||||
self.gguf_writer.add_embedding_length(d_model)
|
self.gguf_writer.add_embedding_length(d_model)
|
||||||
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
||||||
|
@ -3505,6 +3617,14 @@ def parse_args() -> argparse.Namespace:
|
||||||
"--no-tensor-first-split", action="store_true",
|
"--no-tensor-first-split", action="store_true",
|
||||||
help="do not add tensors to the first split (disabled by default)"
|
help="do not add tensors to the first split (disabled by default)"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--metadata", type=Path,
|
||||||
|
help="Specify the path for a metadata file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--get-outfile", action="store_true",
|
||||||
|
help="get calculated default outfile name"
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
@ -3530,8 +3650,15 @@ def split_str_to_n_bytes(split_str: str) -> int:
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
if args.verbose:
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
elif args.get_outfile:
|
||||||
|
# Avoid printing anything besides the dump output
|
||||||
|
logging.basicConfig(level=logging.WARNING)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
metadata = Metadata.load(args.metadata)
|
||||||
dir_model = args.model
|
dir_model = args.model
|
||||||
|
|
||||||
if not dir_model.is_dir():
|
if not dir_model.is_dir():
|
||||||
|
@ -3562,17 +3689,27 @@ def main() -> None:
|
||||||
hparams = Model.load_hparams(dir_model)
|
hparams = Model.load_hparams(dir_model)
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
|
encodingScheme = ftype_map[args.outtype]
|
||||||
|
model_architecture = hparams["architectures"][0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
model_class = Model.from_model_architecture(model_architecture)
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file,
|
model_instance = model_class(dir_model, encodingScheme, fname_out, args.bigendian, args.use_temp_file,
|
||||||
args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors,
|
args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors,
|
||||||
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
||||||
small_first_shard=args.no_tensor_first_split)
|
small_first_shard=args.no_tensor_first_split)
|
||||||
|
|
||||||
|
if args.get_outfile:
|
||||||
|
print(f"{model_instance.fname_default}") # noqa: NP100
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("Set meta model")
|
||||||
|
model_instance.set_gguf_meta_model()
|
||||||
|
|
||||||
logger.info("Set model parameters")
|
logger.info("Set model parameters")
|
||||||
model_instance.gguf_writer.add_type(gguf.GGUFType.MODEL)
|
model_instance.gguf_writer.add_type(gguf.GGUFType.MODEL)
|
||||||
model_instance.set_gguf_parameters()
|
model_instance.set_gguf_parameters()
|
||||||
|
|
|
@ -1021,35 +1021,28 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
|
||||||
raise ValueError(f"Unexpected combination of types: {name_to_type}")
|
raise ValueError(f"Unexpected combination of types: {name_to_type}")
|
||||||
|
|
||||||
|
|
||||||
def model_parameter_count(model: LazyModel) -> int:
|
def per_model_weight_count_estimation(model: LazyModel, expert_count:int) -> int:
|
||||||
total_model_parameters = 0
|
# TODO: Ensure parameter count is accurate throughout various model type
|
||||||
for i, (name, lazy_tensor) in enumerate(model.items()):
|
sum_weight_estimate = 0
|
||||||
|
for name, lazy_tensor in model.items():
|
||||||
|
# We don't need these
|
||||||
|
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Got A Tensor
|
||||||
sum_weights_in_tensor = 1
|
sum_weights_in_tensor = 1
|
||||||
|
|
||||||
|
# Tensor Volume
|
||||||
for dim in lazy_tensor.shape:
|
for dim in lazy_tensor.shape:
|
||||||
sum_weights_in_tensor *= dim
|
sum_weights_in_tensor *= dim
|
||||||
total_model_parameters += sum_weights_in_tensor
|
|
||||||
return total_model_parameters
|
|
||||||
|
|
||||||
|
# Add Tensor Volume To Running Count
|
||||||
|
sum_weight_estimate += sum_weights_in_tensor
|
||||||
|
|
||||||
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
|
# Calculate weight estimate per model
|
||||||
if model_params_count > 1e12 :
|
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
|
||||||
# Trillions Of Parameters
|
|
||||||
scaled_model_params = model_params_count * 1e-12
|
|
||||||
scale_suffix = "T"
|
|
||||||
elif model_params_count > 1e9 :
|
|
||||||
# Billions Of Parameters
|
|
||||||
scaled_model_params = model_params_count * 1e-9
|
|
||||||
scale_suffix = "B"
|
|
||||||
elif model_params_count > 1e6 :
|
|
||||||
# Millions Of Parameters
|
|
||||||
scaled_model_params = model_params_count * 1e-6
|
|
||||||
scale_suffix = "M"
|
|
||||||
else:
|
|
||||||
# Thousands Of Parameters
|
|
||||||
scaled_model_params = model_params_count * 1e-3
|
|
||||||
scale_suffix = "K"
|
|
||||||
|
|
||||||
return f"{round(scaled_model_params)}{scale_suffix}"
|
return per_model_weight_estimate
|
||||||
|
|
||||||
|
|
||||||
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
||||||
|
@ -1231,34 +1224,21 @@ class VocabFactory:
|
||||||
return vocab, special_vocab
|
return vocab, special_vocab
|
||||||
|
|
||||||
|
|
||||||
def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str:
|
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> str:
|
||||||
quantization = {
|
name = metadata.name if metadata is not None and metadata.name is not None else model_name
|
||||||
|
version = metadata.version if metadata is not None and metadata.version is not None else None
|
||||||
|
|
||||||
|
encodingScheme = {
|
||||||
GGMLFileType.AllF32: "F32",
|
GGMLFileType.AllF32: "F32",
|
||||||
GGMLFileType.MostlyF16: "F16",
|
GGMLFileType.MostlyF16: "F16",
|
||||||
GGMLFileType.MostlyQ8_0: "Q8_0",
|
GGMLFileType.MostlyQ8_0: "Q8_0",
|
||||||
}[file_type]
|
}[file_type]
|
||||||
|
|
||||||
parameters = model_parameter_count_rounded_notation(model_params_count)
|
return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme)
|
||||||
|
|
||||||
expert_count = ""
|
|
||||||
if params.n_experts is not None:
|
|
||||||
expert_count = f"{params.n_experts}x"
|
|
||||||
|
|
||||||
version = ""
|
|
||||||
if metadata is not None and metadata.version is not None:
|
|
||||||
version = f"-{metadata.version}"
|
|
||||||
|
|
||||||
name = "ggml-model"
|
|
||||||
if metadata is not None and metadata.name is not None:
|
|
||||||
name = metadata.name
|
|
||||||
elif params.path_model is not None:
|
|
||||||
name = params.path_model.name
|
|
||||||
|
|
||||||
return f"{name}{version}-{expert_count}{parameters}-{quantization}"
|
|
||||||
|
|
||||||
|
|
||||||
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
|
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> Path:
|
||||||
default_filename = default_convention_outfile(file_type, params, model_params_count, metadata)
|
default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata)
|
||||||
ret = model_paths[0].parent / f"{default_filename}.gguf"
|
ret = model_paths[0].parent / f"{default_filename}.gguf"
|
||||||
if ret in model_paths:
|
if ret in model_paths:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
@ -1315,10 +1295,10 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
if args.get_outfile:
|
if args.get_outfile:
|
||||||
model_plus = load_some_model(args.model)
|
model_plus = load_some_model(args.model)
|
||||||
params = Params.load(model_plus)
|
params = Params.load(model_plus)
|
||||||
model = convert_model_names(model_plus.model, params, args.skip_unknown)
|
model = convert_model_names(model_plus.model, params, args.skip_unknown)
|
||||||
model_params_count = model_parameter_count(model_plus.model)
|
model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts)
|
||||||
ftype = pick_output_type(model, args.outtype)
|
ftype = pick_output_type(model, args.outtype)
|
||||||
print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100
|
print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100
|
||||||
return
|
return
|
||||||
|
|
||||||
if args.no_vocab and args.vocab_only:
|
if args.no_vocab and args.vocab_only:
|
||||||
|
@ -1334,8 +1314,8 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
else:
|
else:
|
||||||
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
|
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
|
||||||
|
|
||||||
model_params_count = model_parameter_count(model_plus.model)
|
model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts)
|
||||||
logger.info(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
|
logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})")
|
||||||
|
|
||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
|
@ -1405,7 +1385,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
model = convert_model_names(model, params, args.skip_unknown)
|
model = convert_model_names(model, params, args.skip_unknown)
|
||||||
ftype = pick_output_type(model, args.outtype)
|
ftype = pick_output_type(model, args.outtype)
|
||||||
model = convert_to_output_type(model, ftype)
|
model = convert_to_output_type(model, ftype)
|
||||||
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
|
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
|
||||||
|
|
||||||
params.ftype = ftype
|
params.ftype = ftype
|
||||||
logger.info(f"Writing {outfile}, format {ftype}")
|
logger.info(f"Writing {outfile}, format {ftype}")
|
||||||
|
|
|
@ -5,3 +5,4 @@ from .gguf_writer import *
|
||||||
from .quants import *
|
from .quants import *
|
||||||
from .tensor_mapping import *
|
from .tensor_mapping import *
|
||||||
from .vocab import *
|
from .vocab import *
|
||||||
|
from .utility import *
|
||||||
|
|
35
gguf-py/gguf/utility.py
Normal file
35
gguf-py/gguf/utility.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def model_weight_count_rounded_notation(model_params_count: int) -> str:
|
||||||
|
if model_params_count > 1e15 :
|
||||||
|
# Quadrillion Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-15
|
||||||
|
scale_suffix = "Q"
|
||||||
|
elif model_params_count > 1e12 :
|
||||||
|
# Trillions Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-12
|
||||||
|
scale_suffix = "T"
|
||||||
|
elif model_params_count > 1e9 :
|
||||||
|
# Billions Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-9
|
||||||
|
scale_suffix = "B"
|
||||||
|
elif model_params_count > 1e6 :
|
||||||
|
# Millions Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-6
|
||||||
|
scale_suffix = "M"
|
||||||
|
else:
|
||||||
|
# Thousands Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-3
|
||||||
|
scale_suffix = "K"
|
||||||
|
return f"{round(scaled_model_params)}{scale_suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def naming_convention(model_name: str, version_string:str, expert_count_int:int, model_params_count: int, encodingScheme: str) -> str:
|
||||||
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model"
|
||||||
|
version = f"-{version_string}" if version_string is not None else ""
|
||||||
|
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None and expert_count_int > 0 else ""
|
||||||
|
parameters = model_weight_count_rounded_notation(model_params_count)
|
||||||
|
encodingScheme = encodingScheme.upper()
|
||||||
|
return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}"
|
Loading…
Add table
Add a link
Reference in a new issue