convert-*.py: enable --model-name direct metadata override

This commit is contained in:
brian khuu 2024-06-02 23:56:04 +10:00
parent b1927eed82
commit f7c20793b9
3 changed files with 27 additions and 15 deletions

View file

@ -3624,9 +3624,10 @@ def main() -> None:
else: else:
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
model_name = args.model_name
dir_model = args.model dir_model = args.model
metadata = gguf.Metadata.load(args.metadata, dir_model) metadata = gguf.Metadata.load(args.metadata, dir_model, model_name)
if not dir_model.is_dir(): if not dir_model.is_dir():
logger.error(f'Error: {args.model} is not a directory') logger.error(f'Error: {args.model} is not a directory')

View file

@ -1195,8 +1195,8 @@ class VocabFactory:
return vocab, special_vocab return vocab, special_vocab
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str:
name = metadata.name if metadata.name is not None else model_name name = metadata.name if metadata.name is not None else None
basename = metadata.basename if metadata.basename is not None else None basename = metadata.basename if metadata.basename is not None else None
finetune = metadata.finetune if metadata.finetune is not None else None finetune = metadata.finetune if metadata.finetune is not None else None
version = metadata.version if metadata.version is not None else None version = metadata.version if metadata.version is not None else None
@ -1210,8 +1210,8 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c
return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type) return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type)
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:
default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata) default_filename = default_convention_outfile(file_type, expert_count, model_params_count, metadata)
ret = model_paths[0].parent / f"{default_filename}.gguf" ret = model_paths[0].parent / f"{default_filename}.gguf"
if ret in model_paths: if ret in model_paths:
logger.error( logger.error(
@ -1252,6 +1252,7 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--verbose", action="store_true", help="increase output verbosity") parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file") parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
parser.add_argument("--get-outfile", action="store_true", help="get calculated default outfile name") parser.add_argument("--get-outfile", action="store_true", help="get calculated default outfile name")
parser.add_argument("--model-name", type=str, default=None, help="name of the model")
args = parser.parse_args(args_in) args = parser.parse_args(args_in)
@ -1263,9 +1264,10 @@ def main(args_in: list[str] | None = None) -> None:
else: else:
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
model_name = args.model_name
dir_model = args.model dir_model = args.model
metadata = gguf.Metadata.load(args.metadata) metadata = gguf.Metadata.load(args.metadata, dir_model, model_name)
if args.get_outfile: if args.get_outfile:
model_plus = load_some_model(dir_model) model_plus = load_some_model(dir_model)
@ -1273,7 +1275,11 @@ def main(args_in: list[str] | None = None) -> None:
model = convert_model_names(model_plus.model, params, args.skip_unknown) model = convert_model_names(model_plus.model, params, args.skip_unknown)
model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts)
ftype = pick_output_type(model, args.outtype) ftype = pick_output_type(model, args.outtype)
print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100
if metadata.name is None:
metadata.name = params.path_model.name
print(f"{default_convention_outfile(ftype, params.n_experts, model_params_count, metadata)}") # noqa: NP100
return return
if args.no_vocab and args.vocab_only: if args.no_vocab and args.vocab_only:
@ -1354,13 +1360,16 @@ def main(args_in: list[str] | None = None) -> None:
assert params is not None assert params is not None
if metadata.name is None:
metadata.name = params.path_model.name
logger.info(f"Vocab info: {vocab}") logger.info(f"Vocab info: {vocab}")
logger.info(f"Special vocab info: {special_vocab}") logger.info(f"Special vocab info: {special_vocab}")
model = model_plus.model model = model_plus.model
model = convert_model_names(model, params, args.skip_unknown) model = convert_model_names(model, params, args.skip_unknown)
ftype = pick_output_type(model, args.outtype) ftype = pick_output_type(model, args.outtype)
model = convert_to_output_type(model, ftype) model = convert_to_output_type(model, ftype)
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata)
metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count) metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count)

View file

@ -3,10 +3,8 @@ from __future__ import annotations
import json import json
import frontmatter import frontmatter
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from dataclasses import dataclass from dataclasses import dataclass
from .constants import Keys from .constants import Keys
@ -32,7 +30,7 @@ class Metadata:
datasets: Optional[list[str]] = None datasets: Optional[list[str]] = None
@staticmethod @staticmethod
def load(metadata_override_path: Path, model_path: Path) -> Metadata: def load(metadata_override_path: Optional[Path], model_path: Optional[Path], model_name: Optional[str]) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository # This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format # making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect # as well as giving users the ability to override any authorship metadata that may be incorrect
@ -80,7 +78,7 @@ class Metadata:
if model_path is not None and model_path.exists(): if model_path is not None and model_path.exists():
metadata.name = model_path.name metadata.name = model_path.name
# Metadata Override # Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp # This is based on LLM_KV_NAMES mapping in llama.cpp
metadata_override = Metadata.load_metadata_override(metadata_override_path) metadata_override = Metadata.load_metadata_override(metadata_override_path)
metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202
@ -101,10 +99,14 @@ class Metadata:
metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202
metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202 metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202
# Direct Metadata Override (via direct cli argument)
if model_name is not None:
metadata.name = model_name
return metadata return metadata
@staticmethod @staticmethod
def load_metadata_override(metadata_override_path: Path): def load_metadata_override(metadata_override_path: Optional[Path]):
if metadata_override_path is None or not metadata_override_path.exists(): if metadata_override_path is None or not metadata_override_path.exists():
return {} return {}
@ -112,7 +114,7 @@ class Metadata:
return json.load(f) return json.load(f)
@staticmethod @staticmethod
def load_model_card(model_path: Path): def load_model_card(model_path: Optional[Path]):
if model_path is None or not model_path.exists(): if model_path is None or not model_path.exists():
return {} return {}
@ -125,7 +127,7 @@ class Metadata:
return frontmatter.load(f) return frontmatter.load(f)
@staticmethod @staticmethod
def load_huggingface_parameters(model_path: Path): def load_huggingface_parameters(model_path: Optional[Path]):
if model_path is None or not model_path.exists(): if model_path is None or not model_path.exists():
return {} return {}