convert-*.py: encoding_scheme --> output_type

This commit is contained in:
brian khuu 2024-06-02 01:58:47 +10:00
parent 4d5f18a0e6
commit 5c263cb257
3 changed files with 14 additions and 14 deletions

View file

@ -127,7 +127,7 @@ class Model:
self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch)
# Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
encoding_scheme = self.ftype.name.partition("_")[2] output_type = self.ftype.name.partition("_")[2]
# Get Expert Count From huggingface_parameters # Get Expert Count From huggingface_parameters
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
@ -135,12 +135,12 @@ class Model:
weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count)
# Generate default filename based on model specification and available metadata # Generate default filename based on model specification and available metadata
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encoding_scheme) self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type)
# Filename Output # Filename Output
if fname_out is not None: if fname_out is not None:
# custom defined filename and path was provided # custom defined filename and path was provided
self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, encoding_scheme) self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type)
else: else:
# output in the same directory as the model by default # output in the same directory as the model by default
self.fname_out = dir_model.parent / self.fname_default self.fname_out = dir_model.parent / self.fname_default
@ -3642,7 +3642,7 @@ def main() -> None:
hparams = Model.load_hparams(dir_model) hparams = Model.load_hparams(dir_model)
with torch.inference_mode(): with torch.inference_mode():
encoding_scheme = ftype_map[args.outtype] output_type = ftype_map[args.outtype]
model_architecture = hparams["architectures"][0] model_architecture = hparams["architectures"][0]
try: try:
@ -3651,8 +3651,8 @@ def main() -> None:
logger.error(f"Model {hparams['architectures'][0]} is not supported") logger.error(f"Model {hparams['architectures'][0]} is not supported")
sys.exit(1) sys.exit(1)
model_instance = model_class(dir_model, encodingScheme, fname_out, args.bigendian, args.use_temp_file, model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, args.no_lazy,
args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors, metadata, args.model_name, split_max_tensors=args.split_max_tensors,
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
small_first_shard=args.no_tensor_first_split) small_first_shard=args.no_tensor_first_split)

View file

@ -1198,13 +1198,13 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c
finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None
version = metadata.version if metadata is not None and metadata.version is not None else None version = metadata.version if metadata is not None and metadata.version is not None else None
encodingScheme = { output_type = {
GGMLFileType.AllF32: "F32", GGMLFileType.AllF32: "F32",
GGMLFileType.MostlyF16: "F16", GGMLFileType.MostlyF16: "F16",
GGMLFileType.MostlyQ8_0: "Q8_0", GGMLFileType.MostlyQ8_0: "Q8_0",
}[file_type] }[file_type]
return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, encodingScheme) return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type)
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:

View file

@ -6,10 +6,10 @@ if TYPE_CHECKING:
from torch import Tensor from torch import Tensor
def fill_templated_filename(filename: str, encoding_scheme: str): def fill_templated_filename(filename: str, output_type: str):
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
ftype_uppercase: str = encoding_scheme.upper() ftype_uppercase: str = output_type.upper()
ftype_lowercase: str = encoding_scheme.lower() ftype_lowercase: str = output_type.lower()
return filename.format(ftype_lowercase, return filename.format(ftype_lowercase,
outtype=ftype_lowercase, ftype=ftype_lowercase, outtype=ftype_lowercase, ftype=ftype_lowercase,
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
@ -65,7 +65,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str:
return f"{round(scaled_model_params)}{scale_suffix}" return f"{round(scaled_model_params)}{scale_suffix}"
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, encoding_scheme: str) -> str: def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str:
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
if base_name is not None: if base_name is not None:
@ -85,6 +85,6 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}" precision = f"-{output_type.strip().replace(' ', '-').upper()}"
return f"{name}{parameters}{finetune}{version}{encoding}" return f"{name}{parameters}{finetune}{version}{precision}"