From 5c263cb257b7f3132c7b46bc36182a6a29ad6e13 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 01:58:47 +1000 Subject: [PATCH] convert-*.py: encoding_scheme --> output_type --- convert_hf_to_gguf.py | 12 ++++++------ examples/convert_legacy_llama.py | 4 ++-- gguf-py/gguf/utility.py | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d3c1e4c0c..296895449 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -127,7 +127,7 @@ class Model: self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' - encoding_scheme = self.ftype.name.partition("_")[2] + output_type = self.ftype.name.partition("_")[2] # Get Expert Count From huggingface_parameters expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None @@ -135,12 +135,12 @@ class Model: weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encoding_scheme) + self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) # Filename Output if fname_out is not None: # custom defined filename and path was provided - self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, encoding_scheme) + self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type) else: # output in the same directory as the model by default self.fname_out = dir_model.parent / self.fname_default @@ -3642,7 +3642,7 @@ def main() -> None: hparams = Model.load_hparams(dir_model) with torch.inference_mode(): - encoding_scheme = ftype_map[args.outtype] + output_type = ftype_map[args.outtype] model_architecture = hparams["architectures"][0] try: @@ -3651,8 +3651,8 @@ def main() -> None: logger.error(f"Model {hparams['architectures'][0]} is not supported") sys.exit(1) - model_instance = model_class(dir_model, encodingScheme, fname_out, args.bigendian, args.use_temp_file, - args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors, + model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, args.no_lazy, + metadata, args.model_name, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 38d1745f5..fe18d5970 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1198,13 +1198,13 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None version = metadata.version if metadata is not None and metadata.version is not None else None - encodingScheme = { + output_type = { GGMLFileType.AllF32: "F32", GGMLFileType.MostlyF16: "F16", GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, encodingScheme) + return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 3a6046277..2a52d1273 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -6,10 +6,10 @@ if TYPE_CHECKING: from torch import Tensor -def fill_templated_filename(filename: str, encoding_scheme: str): +def fill_templated_filename(filename: str, output_type: str): # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' - ftype_uppercase: str = encoding_scheme.upper() - ftype_lowercase: str = encoding_scheme.lower() + ftype_uppercase: str = output_type.upper() + ftype_lowercase: str = output_type.lower() return filename.format(ftype_lowercase, outtype=ftype_lowercase, ftype=ftype_lowercase, OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) @@ -65,7 +65,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, encoding_scheme: str) -> str: +def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -85,6 +85,6 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" - encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}" + precision = f"-{output_type.strip().replace(' ', '-').upper()}" - return f"{name}{parameters}{finetune}{version}{encoding}" + return f"{name}{parameters}{finetune}{version}{precision}"