convert-*.py: add heuristic to directory name fallback
Also add source_url for huggingface url
This commit is contained in:
parent
39472a09da
commit
3625a42061
5 changed files with 57 additions and 33 deletions
|
@ -146,7 +146,7 @@ class Model:
|
||||||
self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type)
|
self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type)
|
||||||
else:
|
else:
|
||||||
# output in the same directory as the model by default
|
# output in the same directory as the model by default
|
||||||
self.fname_out = dir_model.parent / self.fname_default
|
self.fname_out = dir_model / f"{self.fname_default}.gguf"
|
||||||
|
|
||||||
# Configure GGUF Writer
|
# Configure GGUF Writer
|
||||||
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
|
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
|
||||||
|
@ -447,7 +447,7 @@ class Model:
|
||||||
sum_weight_estimate += sum_weights_in_tensor
|
sum_weight_estimate += sum_weights_in_tensor
|
||||||
|
|
||||||
# Calculate weight estimate per model
|
# Calculate weight estimate per model
|
||||||
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
|
per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate
|
||||||
|
|
||||||
return per_model_weight_estimate
|
return per_model_weight_estimate
|
||||||
|
|
||||||
|
|
|
@ -806,11 +806,11 @@ class OutputFile:
|
||||||
if metadata.source_hf_repo is not None:
|
if metadata.source_hf_repo is not None:
|
||||||
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
|
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
|
||||||
if metadata.tags is not None:
|
if metadata.tags is not None:
|
||||||
self.gguf_writer.add_tags(metadata.tags)
|
self.gguf.add_tags(metadata.tags)
|
||||||
if metadata.languages is not None:
|
if metadata.languages is not None:
|
||||||
self.gguf_writer.add_languages(metadata.languages)
|
self.gguf.add_languages(metadata.languages)
|
||||||
if metadata.datasets is not None:
|
if metadata.datasets is not None:
|
||||||
self.gguf_writer.add_datasets(metadata.datasets)
|
self.gguf.add_datasets(metadata.datasets)
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
# Metadata About The Neural Architecture Itself
|
# Metadata About The Neural Architecture Itself
|
||||||
|
@ -961,6 +961,8 @@ class OutputFile:
|
||||||
|
|
||||||
of = OutputFile(fname_out, endianess=endianess)
|
of = OutputFile(fname_out, endianess=endianess)
|
||||||
|
|
||||||
|
print(metadata)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
of.add_meta_model(params, metadata)
|
of.add_meta_model(params, metadata)
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
|
@ -1017,7 +1019,7 @@ def per_model_weight_count_estimation(tensors: dict[str, LazyTensor], expert_cou
|
||||||
sum_weight_estimate += sum_weights_in_tensor
|
sum_weight_estimate += sum_weights_in_tensor
|
||||||
|
|
||||||
# Calculate weight estimate per model
|
# Calculate weight estimate per model
|
||||||
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
|
per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate
|
||||||
|
|
||||||
return per_model_weight_estimate
|
return per_model_weight_estimate
|
||||||
|
|
||||||
|
@ -1302,9 +1304,6 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
else:
|
else:
|
||||||
model_plus = ModelPlus(model = {}, paths = [dir_model / 'dummy'], format = 'none', vocab = None)
|
model_plus = ModelPlus(model = {}, paths = [dir_model / 'dummy'], format = 'none', vocab = None)
|
||||||
|
|
||||||
model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts)
|
|
||||||
logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})")
|
|
||||||
|
|
||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
return
|
return
|
||||||
|
@ -1370,6 +1369,9 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
if metadata.name is None:
|
if metadata.name is None:
|
||||||
metadata.name = params.path_model.name
|
metadata.name = params.path_model.name
|
||||||
|
|
||||||
|
model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts)
|
||||||
|
logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})")
|
||||||
|
|
||||||
logger.info(f"Vocab info: {vocab}")
|
logger.info(f"Vocab info: {vocab}")
|
||||||
logger.info(f"Special vocab info: {special_vocab}")
|
logger.info(f"Special vocab info: {special_vocab}")
|
||||||
model = model_plus.model
|
model = model_plus.model
|
||||||
|
|
|
@ -482,13 +482,13 @@ class GGUFWriter:
|
||||||
self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class)
|
self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class)
|
||||||
|
|
||||||
def add_tags(self, tags: Sequence[str]) -> None:
|
def add_tags(self, tags: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.Tokenizer.TAGS, tags)
|
self.add_array(Keys.General.TAGS, tags)
|
||||||
|
|
||||||
def add_languages(self, languages: Sequence[str]) -> None:
|
def add_languages(self, languages: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.Tokenizer.LANGUAGES, languages)
|
self.add_array(Keys.General.LANGUAGES, languages)
|
||||||
|
|
||||||
def add_datasets(self, datasets: Sequence[str]) -> None:
|
def add_datasets(self, datasets: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.Tokenizer.DATASETS, datasets)
|
self.add_array(Keys.General.DATASETS, datasets)
|
||||||
|
|
||||||
def add_name(self, name: str) -> None:
|
def add_name(self, name: str) -> None:
|
||||||
self.add_string(Keys.General.NAME, name)
|
self.add_string(Keys.General.NAME, name)
|
||||||
|
|
|
@ -51,21 +51,28 @@ class Metadata:
|
||||||
metadata.name = model_card.get("model_name")
|
metadata.name = model_card.get("model_name")
|
||||||
|
|
||||||
if "base_model" in model_card:
|
if "base_model" in model_card:
|
||||||
|
# Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
|
||||||
model_id = model_card.get("base_model")
|
model_id = model_card.get("base_model")
|
||||||
model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id)
|
|
||||||
|
|
||||||
if metadata.name is None and model_name_normal is not None:
|
# Check if string. We cannot handle lists as that is too ambagious
|
||||||
metadata.name = model_name_normal
|
if isinstance(model_id, str):
|
||||||
if metadata.organization is None and organization_name is not None:
|
model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id)
|
||||||
metadata.organization = organization_name
|
if metadata.name is None and model_name_normal is not None:
|
||||||
if metadata.basename is None and base_name is not None:
|
metadata.name = model_name_normal
|
||||||
metadata.basename = base_name
|
if metadata.organization is None and organization_name is not None:
|
||||||
if metadata.finetune is None and fine_tune is not None:
|
metadata.organization = organization_name
|
||||||
metadata.finetune = fine_tune
|
if metadata.basename is None and base_name is not None:
|
||||||
if metadata.version is None and version_string is not None:
|
metadata.basename = base_name
|
||||||
metadata.version = version_string
|
if metadata.finetune is None and fine_tune is not None:
|
||||||
if metadata.parameter_weight_class is None and parameter_weight_class is not None:
|
metadata.finetune = fine_tune
|
||||||
metadata.parameter_weight_class = parameter_weight_class
|
if metadata.version is None and version_string is not None:
|
||||||
|
metadata.version = version_string
|
||||||
|
if metadata.parameter_weight_class is None and parameter_weight_class is not None:
|
||||||
|
metadata.parameter_weight_class = parameter_weight_class
|
||||||
|
if metadata.source_url is None:
|
||||||
|
metadata.source_url = f"https://huggingface.co/{model_id}"
|
||||||
|
if metadata.source_hf_repo is None:
|
||||||
|
metadata.source_hf_repo = model_id
|
||||||
|
|
||||||
if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]:
|
if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]:
|
||||||
# This is a model index which has model id that can be extracted into organization and model name
|
# This is a model index which has model id that can be extracted into organization and model name
|
||||||
|
@ -100,11 +107,11 @@ class Metadata:
|
||||||
# non huggingface model card standard but notice some model creator using it
|
# non huggingface model card standard but notice some model creator using it
|
||||||
metadata.author = model_card.get("model_creator")
|
metadata.author = model_card.get("model_creator")
|
||||||
if metadata.tags is None:
|
if metadata.tags is None:
|
||||||
metadata.tags = model_card.get("tags", [])
|
metadata.tags = model_card.get("tags", None)
|
||||||
if metadata.languages is None:
|
if metadata.languages is None:
|
||||||
metadata.languages = model_card.get("language", model_card.get("languages", []))
|
metadata.languages = model_card.get("language", model_card.get("languages", None))
|
||||||
if metadata.datasets is None:
|
if metadata.datasets is None:
|
||||||
metadata.datasets = model_card.get("datasets", model_card.get("dataset", []))
|
metadata.datasets = model_card.get("datasets", model_card.get("dataset", None))
|
||||||
|
|
||||||
# load huggingface parameters if available
|
# load huggingface parameters if available
|
||||||
hf_params = Metadata.load_huggingface_parameters(model_path)
|
hf_params = Metadata.load_huggingface_parameters(model_path)
|
||||||
|
@ -126,14 +133,28 @@ class Metadata:
|
||||||
metadata.version = version_string
|
metadata.version = version_string
|
||||||
if metadata.parameter_weight_class is None and parameter_weight_class is not None:
|
if metadata.parameter_weight_class is None and parameter_weight_class is not None:
|
||||||
metadata.parameter_weight_class = parameter_weight_class
|
metadata.parameter_weight_class = parameter_weight_class
|
||||||
if metadata.source_hf_repo is None and not Metadata.is_model_name_only(hf_name_or_path):
|
if not Metadata.is_model_name_only(hf_name_or_path):
|
||||||
# Can't just have the model name as the source hf repo as a link to the huggingface website needs the org name and the model name
|
# Can't just have the model name as the source hf repo as a link to the huggingface website needs the org name and the model name
|
||||||
metadata.source_hf_repo = "https://huggingface.co/{hf_name_or_path}"
|
if metadata.source_url is None:
|
||||||
|
metadata.source_url = f"https://huggingface.co/{hf_name_or_path}"
|
||||||
|
if metadata.source_hf_repo is None:
|
||||||
|
metadata.source_hf_repo = hf_name_or_path
|
||||||
|
|
||||||
# Use Directory Folder Name As Fallback Name
|
# Use Directory Folder Name As Fallback Name
|
||||||
if metadata.name is None:
|
if model_path is not None and model_path.exists():
|
||||||
if model_path is not None and model_path.exists():
|
model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_path.name)
|
||||||
metadata.name = model_path.name
|
if metadata.name is None and model_name_normal is not None:
|
||||||
|
metadata.name = model_name_normal
|
||||||
|
if metadata.organization is None and organization_name is not None:
|
||||||
|
metadata.organization = organization_name
|
||||||
|
if metadata.basename is None and base_name is not None:
|
||||||
|
metadata.basename = base_name
|
||||||
|
if metadata.finetune is None and fine_tune is not None:
|
||||||
|
metadata.finetune = fine_tune
|
||||||
|
if metadata.version is None and version_string is not None:
|
||||||
|
metadata.version = version_string
|
||||||
|
if metadata.parameter_weight_class is None and parameter_weight_class is not None:
|
||||||
|
metadata.parameter_weight_class = parameter_weight_class
|
||||||
|
|
||||||
# Metadata Override File Provided
|
# Metadata Override File Provided
|
||||||
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
||||||
|
|
|
@ -44,6 +44,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str
|
||||||
|
|
||||||
return size_class
|
return size_class
|
||||||
|
|
||||||
|
|
||||||
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str:
|
def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str:
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue