convert-*.py: encoding_scheme --> output_type
This commit is contained in:
parent
4d5f18a0e6
commit
5c263cb257
3 changed files with 14 additions and 14 deletions
|
@ -127,7 +127,7 @@ class Model:
|
||||||
self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch)
|
self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch)
|
||||||
|
|
||||||
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
|
||||||
encoding_scheme = self.ftype.name.partition("_")[2]
|
output_type = self.ftype.name.partition("_")[2]
|
||||||
|
|
||||||
# Get Expert Count From huggingface_parameters
|
# Get Expert Count From huggingface_parameters
|
||||||
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
|
||||||
|
@ -135,12 +135,12 @@ class Model:
|
||||||
weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count)
|
weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count)
|
||||||
|
|
||||||
# Generate default filename based on model specification and available metadata
|
# Generate default filename based on model specification and available metadata
|
||||||
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encoding_scheme)
|
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
|
||||||
|
|
||||||
# Filename Output
|
# Filename Output
|
||||||
if fname_out is not None:
|
if fname_out is not None:
|
||||||
# custom defined filename and path was provided
|
# custom defined filename and path was provided
|
||||||
self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, encoding_scheme)
|
self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type)
|
||||||
else:
|
else:
|
||||||
# output in the same directory as the model by default
|
# output in the same directory as the model by default
|
||||||
self.fname_out = dir_model.parent / self.fname_default
|
self.fname_out = dir_model.parent / self.fname_default
|
||||||
|
@ -3642,7 +3642,7 @@ def main() -> None:
|
||||||
hparams = Model.load_hparams(dir_model)
|
hparams = Model.load_hparams(dir_model)
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
encoding_scheme = ftype_map[args.outtype]
|
output_type = ftype_map[args.outtype]
|
||||||
model_architecture = hparams["architectures"][0]
|
model_architecture = hparams["architectures"][0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -3651,8 +3651,8 @@ def main() -> None:
|
||||||
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
model_instance = model_class(dir_model, encodingScheme, fname_out, args.bigendian, args.use_temp_file,
|
model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, args.no_lazy,
|
||||||
args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors,
|
metadata, args.model_name, split_max_tensors=args.split_max_tensors,
|
||||||
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
||||||
small_first_shard=args.no_tensor_first_split)
|
small_first_shard=args.no_tensor_first_split)
|
||||||
|
|
||||||
|
|
|
@ -1198,13 +1198,13 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c
|
||||||
finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None
|
finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None
|
||||||
version = metadata.version if metadata is not None and metadata.version is not None else None
|
version = metadata.version if metadata is not None and metadata.version is not None else None
|
||||||
|
|
||||||
encodingScheme = {
|
output_type = {
|
||||||
GGMLFileType.AllF32: "F32",
|
GGMLFileType.AllF32: "F32",
|
||||||
GGMLFileType.MostlyF16: "F16",
|
GGMLFileType.MostlyF16: "F16",
|
||||||
GGMLFileType.MostlyQ8_0: "Q8_0",
|
GGMLFileType.MostlyQ8_0: "Q8_0",
|
||||||
}[file_type]
|
}[file_type]
|
||||||
|
|
||||||
return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, encodingScheme)
|
return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type)
|
||||||
|
|
||||||
|
|
||||||
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:
|
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:
|
||||||
|
|
|
@ -6,10 +6,10 @@ if TYPE_CHECKING:
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
|
|
||||||
|
|
||||||
def fill_templated_filename(filename: str, encoding_scheme: str):
|
def fill_templated_filename(filename: str, output_type: str):
|
||||||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||||
ftype_uppercase: str = encoding_scheme.upper()
|
ftype_uppercase: str = output_type.upper()
|
||||||
ftype_lowercase: str = encoding_scheme.lower()
|
ftype_lowercase: str = output_type.lower()
|
||||||
return filename.format(ftype_lowercase,
|
return filename.format(ftype_lowercase,
|
||||||
outtype=ftype_lowercase, ftype=ftype_lowercase,
|
outtype=ftype_lowercase, ftype=ftype_lowercase,
|
||||||
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
||||||
|
@ -65,7 +65,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str:
|
||||||
return f"{round(scaled_model_params)}{scale_suffix}"
|
return f"{round(scaled_model_params)}{scale_suffix}"
|
||||||
|
|
||||||
|
|
||||||
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, encoding_scheme: str) -> str:
|
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
if base_name is not None:
|
if base_name is not None:
|
||||||
|
@ -85,6 +85,6 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
|
||||||
|
|
||||||
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
||||||
|
|
||||||
encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}"
|
precision = f"-{output_type.strip().replace(' ', '-').upper()}"
|
||||||
|
|
||||||
return f"{name}{parameters}{finetune}{version}{encoding}"
|
return f"{name}{parameters}{finetune}{version}{precision}"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue