gguf-py : handle more name metadata extraction edge cases
* gguf-py : output the split plan on stdout when using dry_run * convert_hf : unify vocab naming convention with the standard one This also adds a way to name LoRA models.
This commit is contained in:
parent
4c9932c1e1
commit
73899f74cf
5 changed files with 78 additions and 47 deletions
|
@ -355,30 +355,23 @@ class Model:
|
||||||
if self.metadata.size_label is None and total_params > 0:
|
if self.metadata.size_label is None and total_params > 0:
|
||||||
self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count)
|
self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count)
|
||||||
|
|
||||||
|
# Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0'
|
||||||
|
output_type: str = self.ftype.name.partition("_")[2]
|
||||||
|
|
||||||
# Filename Output
|
# Filename Output
|
||||||
|
# Note: `not is_dir()` is used because `.is_file()` will not detect
|
||||||
|
# file template strings as it doesn't actually exist as a file
|
||||||
if self.fname_out is not None and not self.fname_out.is_dir():
|
if self.fname_out is not None and not self.fname_out.is_dir():
|
||||||
# Output path is a custom defined templated filename
|
# Output path is a custom defined templated filename
|
||||||
# Note: `not is_dir()` is used because `.is_file()` will not detect
|
|
||||||
# file template strings as it doesn't actually exist as a file
|
|
||||||
|
|
||||||
# Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0'
|
|
||||||
output_type: str = self.ftype.name.partition("_")[2]
|
|
||||||
|
|
||||||
# Process templated file name with the output ftype, useful with the "auto" ftype
|
# Process templated file name with the output ftype, useful with the "auto" ftype
|
||||||
self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type)
|
self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
# Generate default filename based on model specification and available metadata
|
# Generate default filename based on model specification and available metadata
|
||||||
if vocab_only:
|
if not vocab_only:
|
||||||
# Vocab based default filename
|
fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type, model_type="LoRA" if total_params < 0 else None)
|
||||||
fname_default: str = gguf.naming_convention_vocab_only(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version)
|
|
||||||
else:
|
else:
|
||||||
|
fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, size_label=None, output_type=None, model_type="vocab")
|
||||||
# Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0'
|
|
||||||
output_type: str = self.ftype.name.partition("_")[2]
|
|
||||||
|
|
||||||
# Standard default filename
|
|
||||||
fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type)
|
|
||||||
|
|
||||||
# Check if preferred output directory path was provided
|
# Check if preferred output directory path was provided
|
||||||
if self.fname_out is not None and self.fname_out.is_dir():
|
if self.fname_out is not None and self.fname_out.is_dir():
|
||||||
|
|
|
@ -147,6 +147,10 @@ class GGUFWriter:
|
||||||
# Hopefully this should work even for variable-expert-count models
|
# Hopefully this should work even for variable-expert-count models
|
||||||
expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
|
expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
|
||||||
|
|
||||||
|
# Negate the total to signal it's likely not exact
|
||||||
|
if last_lora_a is not None:
|
||||||
|
total_params = -total_params
|
||||||
|
|
||||||
# NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
|
# NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
|
||||||
return total_params, shared_params, expert_params, expert_count
|
return total_params, shared_params, expert_params, expert_count
|
||||||
|
|
||||||
|
@ -181,6 +185,8 @@ class GGUFWriter:
|
||||||
|
|
||||||
if self.dry_run:
|
if self.dry_run:
|
||||||
logger.info("Dry run, not writing files")
|
logger.info("Dry run, not writing files")
|
||||||
|
for name in filenames:
|
||||||
|
print(name) # noqa: NP100
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
return filenames
|
return filenames
|
||||||
|
|
|
@ -186,27 +186,34 @@ class Metadata:
|
||||||
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
|
if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
|
||||||
name_types[i].add("version")
|
name_types[i].add("version")
|
||||||
# Quant type (should not be there for base models, but still annotated)
|
# Quant type (should not be there for base models, but still annotated)
|
||||||
elif re.fullmatch(r'[iI]?[qQ]\d(_\w)*', part):
|
elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE):
|
||||||
name_types[i].add("type")
|
name_types[i].add("type")
|
||||||
name_parts[i] = part.upper()
|
name_parts[i] = part.upper()
|
||||||
# Model size
|
# Model size
|
||||||
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[kMBT]|small|mini|medium|large|xl)', part, re.IGNORECASE):
|
elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE):
|
||||||
part = part.replace("_", ".")
|
part = part.replace("_", ".")
|
||||||
|
# Handle weird bloom-7b1 notation
|
||||||
|
if part[-1].isdecimal():
|
||||||
|
part = part[:-2] + "." + part[-1] + part[-2]
|
||||||
|
# Normalize the size suffixes
|
||||||
if len(part) > 1 and part[-2].isdecimal():
|
if len(part) > 1 and part[-2].isdecimal():
|
||||||
if part[-1] in "mbt":
|
if part[-1] in "kmbt":
|
||||||
part = part[:-1] + part[-1].upper()
|
part = part[:-1] + part[-1].upper()
|
||||||
elif part[-1] in "k":
|
if total_params != 0:
|
||||||
part = part[:-1] + part[-1].lower()
|
|
||||||
if total_params > 0:
|
|
||||||
try:
|
try:
|
||||||
label_params = float(part[:-1]) * pow(1000, " kMBT".find(part[-1]))
|
label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1]))
|
||||||
# Only use it as a size label if it's close or bigger than the model size
|
# Only use it as a size label if it's close or bigger than the model size
|
||||||
# Note that LoRA adapters don't necessarily include all layers,
|
# Note that LoRA adapters don't necessarily include all layers,
|
||||||
# so this is why bigger label sizes are accepted.
|
# so this is why bigger label sizes are accepted.
|
||||||
# Do not use the size label when it's smaller than 3/4 of the model size
|
# Do not use the size label when it's smaller than 1/8 of the model size
|
||||||
if total_params - label_params > total_params // 4:
|
if (total_params < 0 and label_params < abs(total_params) // 8) or (
|
||||||
|
# Check both directions when the current model isn't a LoRA adapter
|
||||||
|
total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8
|
||||||
|
):
|
||||||
# Likely a context length
|
# Likely a context length
|
||||||
name_types[i].add("finetune")
|
name_types[i].add("finetune")
|
||||||
|
# Lowercase the size when it's a context length
|
||||||
|
part = part[:-1] + part[-1].lower()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# Failed to convert the size label to float, use it anyway
|
# Failed to convert the size label to float, use it anyway
|
||||||
pass
|
pass
|
||||||
|
@ -214,8 +221,10 @@ class Metadata:
|
||||||
name_types[i].add("size_label")
|
name_types[i].add("size_label")
|
||||||
name_parts[i] = part
|
name_parts[i] = part
|
||||||
# Some easy to recognize finetune names
|
# Some easy to recognize finetune names
|
||||||
elif i > 0 and re.fullmatch(r'chat|instruct|vision', part, re.IGNORECASE):
|
elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE):
|
||||||
name_types[i].add("finetune")
|
name_types[i].add("finetune")
|
||||||
|
if part.lower() == "lora":
|
||||||
|
name_parts[i] = "LoRA"
|
||||||
|
|
||||||
at_start = True
|
at_start = True
|
||||||
# Find the basename through the annotated name
|
# Find the basename through the annotated name
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
||||||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||||
|
@ -36,15 +38,15 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int
|
||||||
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
|
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
|
||||||
|
|
||||||
if expert_count > 0:
|
if expert_count > 0:
|
||||||
pretty_size = model_weight_count_rounded_notation(shared_params + expert_params, min_digits=2)
|
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
|
||||||
size_class = f"{expert_count}x{pretty_size}"
|
size_class = f"{expert_count}x{pretty_size}"
|
||||||
else:
|
else:
|
||||||
size_class = model_weight_count_rounded_notation(total_params, min_digits=2)
|
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
|
||||||
|
|
||||||
return size_class
|
return size_class
|
||||||
|
|
||||||
|
|
||||||
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None) -> str:
|
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
||||||
|
|
||||||
if base_name is not None:
|
if base_name is not None:
|
||||||
|
@ -60,23 +62,8 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st
|
||||||
|
|
||||||
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
||||||
|
|
||||||
precision = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
|
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
|
||||||
|
|
||||||
return f"{name}{parameters}{finetune}{version}{precision}"
|
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
||||||
|
|
||||||
|
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
||||||
def naming_convention_vocab_only(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None) -> str:
|
|
||||||
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
|
||||||
|
|
||||||
if base_name is not None:
|
|
||||||
name = base_name.strip().title().replace(' ', '-').replace('/', '-')
|
|
||||||
elif model_name is not None:
|
|
||||||
name = model_name.strip().title().replace(' ', '-').replace('/', '-')
|
|
||||||
else:
|
|
||||||
name = "ggml-model"
|
|
||||||
|
|
||||||
finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else ""
|
|
||||||
|
|
||||||
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
|
||||||
|
|
||||||
return f"{name}{finetune}{version}-vocab"
|
|
||||||
|
|
|
@ -69,7 +69,7 @@ class TestMetadataMethod(unittest.TestCase):
|
||||||
|
|
||||||
# There is some legitimate models with only thousands of parameters
|
# There is some legitimate models with only thousands of parameters
|
||||||
self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3),
|
self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3),
|
||||||
('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50k'))
|
('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50K'))
|
||||||
|
|
||||||
# None standard and not easy to disambiguate
|
# None standard and not easy to disambiguate
|
||||||
self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"),
|
self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"),
|
||||||
|
@ -87,6 +87,42 @@ class TestMetadataMethod(unittest.TestCase):
|
||||||
self.assertEqual(gguf.Metadata.get_model_id_components("smallcloudai/Refact-1_6B-fim"),
|
self.assertEqual(gguf.Metadata.get_model_id_components("smallcloudai/Refact-1_6B-fim"),
|
||||||
('Refact-1_6B-fim', 'smallcloudai', 'Refact', 'fim', None, '1.6B'))
|
('Refact-1_6B-fim', 'smallcloudai', 'Refact', 'fim', None, '1.6B'))
|
||||||
|
|
||||||
|
# Uses Iter3 for the version
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3"),
|
||||||
|
('Gemma-2-9B-It-SPPO-Iter3', 'UCLA-AGI', 'Gemma-2', 'It-SPPO', 'Iter3', '9B'))
|
||||||
|
|
||||||
|
# Has two potential versions in the basename
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Hermes-2-Theta-Llama-3-8B"),
|
||||||
|
('Hermes-2-Theta-Llama-3-8B', 'NousResearch', 'Hermes-2-Theta-Llama-3', None, None, '8B'))
|
||||||
|
|
||||||
|
# Potential version in the basename
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("SeaLLMs/SeaLLMs-v3-7B-Chat"),
|
||||||
|
('SeaLLMs-v3-7B-Chat', 'SeaLLMs', 'SeaLLMs-v3', 'Chat', None, '7B'))
|
||||||
|
|
||||||
|
# Underscore in the basename, and 1m for the context size
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("internlm/internlm2_5-7b-chat-1m", 7 * 10**9),
|
||||||
|
('internlm2_5-7b-chat-1m', 'internlm', 'internlm2_5', 'chat-1m', None, '7B'))
|
||||||
|
|
||||||
|
# Version before the finetune name
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("pszemraj/jamba-900M-v0.13-KIx2"),
|
||||||
|
('jamba-900M-v0.13-KIx2', 'pszemraj', 'jamba', 'KIx2', 'v0.13', '900M'))
|
||||||
|
|
||||||
|
# TODO: hf suffix which could be ignored but isn't
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("state-spaces/mamba-2.8b-hf"),
|
||||||
|
('mamba-2.8b-hf', 'state-spaces', 'mamba', 'hf', None, '2.8B'))
|
||||||
|
|
||||||
|
# Two sizes, don't merge them, the other is the number of tokens on which it was trained
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("abacaj/llama-161M-100B", 161 * 10**6),
|
||||||
|
('llama-161M-100B', 'abacaj', 'llama', '100b', None, '161M'))
|
||||||
|
|
||||||
|
# It's a trap, there is no size label
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("SparseLLM/relu-100B", 1340 * 10**6),
|
||||||
|
('relu-100B', 'SparseLLM', 'relu', '100b', None, None))
|
||||||
|
|
||||||
|
# Weird size notation
|
||||||
|
self.assertEqual(gguf.Metadata.get_model_id_components("bigscience/bloom-7b1-petals"),
|
||||||
|
('bloom-7b1-petals', 'bigscience', 'bloom', 'petals', None, '7.1B'))
|
||||||
|
|
||||||
def test_apply_metadata_heuristic_from_model_card(self):
|
def test_apply_metadata_heuristic_from_model_card(self):
|
||||||
model_card = {
|
model_card = {
|
||||||
'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'],
|
'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue