convert-*.py: enable --model-name direct metadata override

This commit is contained in:
brian khuu 2024-06-02 23:56:04 +10:00
parent b1927eed82
commit f7c20793b9
3 changed files with 27 additions and 15 deletions

View file

@ -3624,9 +3624,10 @@ def main() -> None:
else:
logging.basicConfig(level=logging.INFO)
model_name = args.model_name
dir_model = args.model
metadata = gguf.Metadata.load(args.metadata, dir_model)
metadata = gguf.Metadata.load(args.metadata, dir_model, model_name)
if not dir_model.is_dir():
logger.error(f'Error: {args.model} is not a directory')

View file

@ -1195,8 +1195,8 @@ class VocabFactory:
return vocab, special_vocab
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str:
name = metadata.name if metadata.name is not None else model_name
def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str:
name = metadata.name if metadata.name is not None else None
basename = metadata.basename if metadata.basename is not None else None
finetune = metadata.finetune if metadata.finetune is not None else None
version = metadata.version if metadata.version is not None else None
@ -1210,8 +1210,8 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c
return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type)
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:
default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata)
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path:
default_filename = default_convention_outfile(file_type, expert_count, model_params_count, metadata)
ret = model_paths[0].parent / f"{default_filename}.gguf"
if ret in model_paths:
logger.error(
@ -1252,6 +1252,7 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
parser.add_argument("--get-outfile", action="store_true", help="get calculated default outfile name")
parser.add_argument("--model-name", type=str, default=None, help="name of the model")
args = parser.parse_args(args_in)
@ -1263,9 +1264,10 @@ def main(args_in: list[str] | None = None) -> None:
else:
logging.basicConfig(level=logging.INFO)
model_name = args.model_name
dir_model = args.model
metadata = gguf.Metadata.load(args.metadata)
metadata = gguf.Metadata.load(args.metadata, dir_model, model_name)
if args.get_outfile:
model_plus = load_some_model(dir_model)
@ -1273,7 +1275,11 @@ def main(args_in: list[str] | None = None) -> None:
model = convert_model_names(model_plus.model, params, args.skip_unknown)
model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts)
ftype = pick_output_type(model, args.outtype)
print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100
if metadata.name is None:
metadata.name = params.path_model.name
print(f"{default_convention_outfile(ftype, params.n_experts, model_params_count, metadata)}") # noqa: NP100
return
if args.no_vocab and args.vocab_only:
@ -1354,13 +1360,16 @@ def main(args_in: list[str] | None = None) -> None:
assert params is not None
if metadata.name is None:
metadata.name = params.path_model.name
logger.info(f"Vocab info: {vocab}")
logger.info(f"Special vocab info: {special_vocab}")
model = model_plus.model
model = convert_model_names(model, params, args.skip_unknown)
ftype = pick_output_type(model, args.outtype)
model = convert_to_output_type(model, ftype)
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata)
metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count)

View file

@ -3,10 +3,8 @@ from __future__ import annotations
import json
import frontmatter
from pathlib import Path
from typing import Optional
from dataclasses import dataclass
from .constants import Keys
@ -32,7 +30,7 @@ class Metadata:
datasets: Optional[list[str]] = None
@staticmethod
def load(metadata_override_path: Path, model_path: Path) -> Metadata:
def load(metadata_override_path: Optional[Path], model_path: Optional[Path], model_name: Optional[str]) -> Metadata:
# This grabs as many contextual authorship metadata as possible from the model repository
# making any conversion as required to match the gguf kv store metadata format
# as well as giving users the ability to override any authorship metadata that may be incorrect
@ -80,7 +78,7 @@ class Metadata:
if model_path is not None and model_path.exists():
metadata.name = model_path.name
# Metadata Override
# Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp
metadata_override = Metadata.load_metadata_override(metadata_override_path)
metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202
@ -101,10 +99,14 @@ class Metadata:
metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202
metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202
# Direct Metadata Override (via direct cli argument)
if model_name is not None:
metadata.name = model_name
return metadata
@staticmethod
def load_metadata_override(metadata_override_path: Path):
def load_metadata_override(metadata_override_path: Optional[Path]):
if metadata_override_path is None or not metadata_override_path.exists():
return {}
@ -112,7 +114,7 @@ class Metadata:
return json.load(f)
@staticmethod
def load_model_card(model_path: Path):
def load_model_card(model_path: Optional[Path]):
if model_path is None or not model_path.exists():
return {}
@ -125,7 +127,7 @@ class Metadata:
return frontmatter.load(f)
@staticmethod
def load_huggingface_parameters(model_path: Path):
def load_huggingface_parameters(model_path: Optional[Path]):
if model_path is None or not model_path.exists():
return {}