gguf-py : move scripts directory (#11116)
* Moved scripts dir and fixed pyproject.toml * updated readme * fixed README urls * bump pypi gguf to v0.14.0 * retrigger ci * empty commit - trigger ci
This commit is contained in:
parent
1bf839b1e8
commit
8a1d9c25fa
8 changed files with 9 additions and 10 deletions
6
gguf-py/gguf/scripts/__init__.py
Normal file
6
gguf-py/gguf/scripts/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
# pyright: reportUnusedImport=false
|
||||
|
||||
from .gguf_convert_endian import main as gguf_convert_endian_entrypoint
|
||||
from .gguf_dump import main as gguf_dump_entrypoint
|
||||
from .gguf_set_metadata import main as gguf_set_metadata_entrypoint
|
||||
from .gguf_new_metadata import main as gguf_new_metadata_entrypoint
|
134
gguf-py/gguf/scripts/gguf_convert_endian.py
Executable file
134
gguf-py/gguf/scripts/gguf_convert_endian.py
Executable file
|
@ -0,0 +1,134 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Necessary to load the local gguf package
|
||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import gguf
|
||||
|
||||
logger = logging.getLogger("gguf-convert-endian")
|
||||
|
||||
|
||||
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
|
||||
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
|
||||
# Host is little endian
|
||||
host_endian = "little"
|
||||
swapped_endian = "big"
|
||||
else:
|
||||
# Sorry PDP or other weird systems that don't use BE or LE.
|
||||
host_endian = "big"
|
||||
swapped_endian = "little"
|
||||
if reader.byte_order == "S":
|
||||
file_endian = swapped_endian
|
||||
else:
|
||||
file_endian = host_endian
|
||||
order = host_endian if args.order == "native" else args.order
|
||||
logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
|
||||
if file_endian == order:
|
||||
logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
|
||||
sys.exit(0)
|
||||
logger.info("* Checking tensors for conversion compatibility")
|
||||
for tensor in reader.tensors:
|
||||
if tensor.tensor_type not in (
|
||||
gguf.GGMLQuantizationType.F32,
|
||||
gguf.GGMLQuantizationType.F16,
|
||||
gguf.GGMLQuantizationType.Q8_0,
|
||||
):
|
||||
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
|
||||
logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
|
||||
if args.dry_run:
|
||||
return
|
||||
logger.warning("*** Warning *** Warning *** Warning **")
|
||||
logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
|
||||
if order != host_endian:
|
||||
logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
|
||||
logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
|
||||
logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
|
||||
response = input("YES, I am sure> ")
|
||||
if response != "YES":
|
||||
logger.warning("You didn't enter YES. Okay then, see ya!")
|
||||
sys.exit(0)
|
||||
logger.info(f"* Converting fields ({len(reader.fields)})")
|
||||
for idx, field in enumerate(reader.fields.values()):
|
||||
logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
|
||||
for part in field.parts:
|
||||
part.byteswap(inplace=True)
|
||||
logger.info(f"* Converting tensors ({len(reader.tensors)})")
|
||||
|
||||
for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
|
||||
log_message = (
|
||||
f"Converting tensor {repr(tensor.name)}, "
|
||||
f"type={tensor.tensor_type.name}, "
|
||||
f"elements={tensor.n_elements} "
|
||||
)
|
||||
|
||||
# Byte-swap each part of the tensor's field
|
||||
for part in tensor.field.parts:
|
||||
part.byteswap(inplace=True)
|
||||
|
||||
# Byte-swap tensor data if necessary
|
||||
if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
|
||||
# Handle Q8_0 tensor blocks (block_q8_0)
|
||||
# Specific handling of block_q8_0 is required.
|
||||
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
|
||||
|
||||
block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
|
||||
|
||||
n_blocks = len(tensor.data) // block_size
|
||||
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
|
||||
block_offs = block_num * block_size
|
||||
|
||||
# Byte-Swap f16 sized delta field
|
||||
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
|
||||
delta.byteswap(inplace=True)
|
||||
|
||||
# Byte-Swap Q8 weights
|
||||
if block_num % 100000 == 0:
|
||||
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
|
||||
|
||||
else:
|
||||
# Handle other tensor types
|
||||
tensor.data.byteswap(inplace=True)
|
||||
|
||||
pbar.set_description(log_message)
|
||||
|
||||
logger.info("* Completion")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
|
||||
parser.add_argument(
|
||||
"model", type=str,
|
||||
help="GGUF format model filename",
|
||||
)
|
||||
parser.add_argument(
|
||||
"order", type=str, choices=['big', 'little', 'native'],
|
||||
help="Requested byte order",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true",
|
||||
help="Don't actually change anything",
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||
|
||||
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||
|
||||
logger.info(f'* Loading: {args.model}')
|
||||
reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
|
||||
convert_byteorder(reader, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
454
gguf-py/gguf/scripts/gguf_dump.py
Executable file
454
gguf-py/gguf/scripts/gguf_dump.py
Executable file
|
@ -0,0 +1,454 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Necessary to load the local gguf package
|
||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402
|
||||
|
||||
logger = logging.getLogger("gguf-dump")
|
||||
|
||||
|
||||
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
|
||||
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
|
||||
if reader.byte_order == 'S':
|
||||
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
|
||||
else:
|
||||
file_endian = host_endian
|
||||
return (host_endian, file_endian)
|
||||
|
||||
|
||||
# For more information about what field.parts and field.data represent,
|
||||
# please see the comments in the modify_gguf.py example.
|
||||
def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
|
||||
host_endian, file_endian = get_file_host_endian(reader)
|
||||
print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100
|
||||
print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100
|
||||
for n, field in enumerate(reader.fields.values(), 1):
|
||||
if not field.types:
|
||||
pretty_type = 'N/A'
|
||||
elif field.types[0] == GGUFValueType.ARRAY:
|
||||
nest_count = len(field.types) - 1
|
||||
pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
|
||||
else:
|
||||
pretty_type = str(field.types[-1].name)
|
||||
|
||||
log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
|
||||
if len(field.types) == 1:
|
||||
curr_type = field.types[0]
|
||||
if curr_type == GGUFValueType.STRING:
|
||||
log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
|
||||
elif field.types[0] in reader.gguf_scalar_to_np:
|
||||
log_message += ' = {0}'.format(field.parts[-1][0])
|
||||
print(log_message) # noqa: NP100
|
||||
if args.no_tensors:
|
||||
return
|
||||
print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100
|
||||
for n, tensor in enumerate(reader.tensors, 1):
|
||||
prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
|
||||
print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100
|
||||
|
||||
|
||||
def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
|
||||
import json
|
||||
host_endian, file_endian = get_file_host_endian(reader)
|
||||
metadata: dict[str, Any] = {}
|
||||
tensors: dict[str, Any] = {}
|
||||
result = {
|
||||
"filename": args.model,
|
||||
"endian": file_endian,
|
||||
"metadata": metadata,
|
||||
"tensors": tensors,
|
||||
}
|
||||
for idx, field in enumerate(reader.fields.values()):
|
||||
curr: dict[str, Any] = {
|
||||
"index": idx,
|
||||
"type": field.types[0].name if field.types else 'UNKNOWN',
|
||||
"offset": field.offset,
|
||||
}
|
||||
metadata[field.name] = curr
|
||||
if field.types[:1] == [GGUFValueType.ARRAY]:
|
||||
curr["array_types"] = [t.name for t in field.types][1:]
|
||||
if not args.json_array:
|
||||
continue
|
||||
itype = field.types[-1]
|
||||
if itype == GGUFValueType.STRING:
|
||||
curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
|
||||
else:
|
||||
curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
|
||||
elif field.types[0] == GGUFValueType.STRING:
|
||||
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
|
||||
else:
|
||||
curr["value"] = field.parts[-1].tolist()[0]
|
||||
if not args.no_tensors:
|
||||
for idx, tensor in enumerate(reader.tensors):
|
||||
tensors[tensor.name] = {
|
||||
"index": idx,
|
||||
"shape": tensor.shape.tolist(),
|
||||
"type": tensor.tensor_type.name,
|
||||
"offset": tensor.field.offset,
|
||||
}
|
||||
json.dump(result, sys.stdout)
|
||||
|
||||
|
||||
def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]):
|
||||
# JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
|
||||
|
||||
# Alignment Utility Function
|
||||
def strAlign(padding: int, alignMode: str | None, strVal: str):
|
||||
if alignMode == 'center':
|
||||
return strVal.center(padding)
|
||||
elif alignMode == 'right':
|
||||
return strVal.rjust(padding - 1) + ' '
|
||||
elif alignMode == 'left':
|
||||
return ' ' + strVal.ljust(padding - 1)
|
||||
else: # default left
|
||||
return ' ' + strVal.ljust(padding - 1)
|
||||
|
||||
def dashAlign(padding: int, alignMode: str | None):
|
||||
if alignMode == 'center':
|
||||
return ':' + '-' * (padding - 2) + ':'
|
||||
elif alignMode == 'right':
|
||||
return '-' * (padding - 1) + ':'
|
||||
elif alignMode == 'left':
|
||||
return ':' + '-' * (padding - 1)
|
||||
else: # default left
|
||||
return '-' * (padding)
|
||||
|
||||
# Calculate Padding For Each Column Based On Header and Data Length
|
||||
rowsPadding = {}
|
||||
for index, columnEntry in enumerate(header_map):
|
||||
padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
|
||||
headerPadCount = len(columnEntry['header_name']) + 2
|
||||
rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
|
||||
|
||||
# Render Markdown Header
|
||||
rows = []
|
||||
rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
|
||||
rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
|
||||
|
||||
# Render Tabular Data
|
||||
for item in data:
|
||||
rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
|
||||
|
||||
# Convert Tabular String Rows Into String
|
||||
tableString = ""
|
||||
for row in rows:
|
||||
tableString += f'|{row}|\n'
|
||||
|
||||
return tableString
|
||||
|
||||
|
||||
def element_count_rounded_notation(count: int) -> str:
|
||||
if count > 1e15 :
|
||||
# Quadrillion
|
||||
scaled_amount = count * 1e-15
|
||||
scale_suffix = "Q"
|
||||
elif count > 1e12 :
|
||||
# Trillions
|
||||
scaled_amount = count * 1e-12
|
||||
scale_suffix = "T"
|
||||
elif count > 1e9 :
|
||||
# Billions
|
||||
scaled_amount = count * 1e-9
|
||||
scale_suffix = "B"
|
||||
elif count > 1e6 :
|
||||
# Millions
|
||||
scaled_amount = count * 1e-6
|
||||
scale_suffix = "M"
|
||||
elif count > 1e3 :
|
||||
# Thousands
|
||||
scaled_amount = count * 1e-3
|
||||
scale_suffix = "K"
|
||||
else:
|
||||
# Under Thousands
|
||||
scaled_amount = count
|
||||
scale_suffix = ""
|
||||
return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}"
|
||||
|
||||
|
||||
def translate_tensor_name(name):
|
||||
words = name.split(".")
|
||||
|
||||
# Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names
|
||||
abbreviation_dictionary = {
|
||||
'token_embd': 'Token embedding',
|
||||
'pos_embd': 'Position embedding',
|
||||
'output_norm': 'Output normalization',
|
||||
'output': 'Output',
|
||||
'attn_norm': 'Attention normalization',
|
||||
'attn_norm_2': 'Attention normalization',
|
||||
'attn_qkv': 'Attention query-key-value',
|
||||
'attn_q': 'Attention query',
|
||||
'attn_k': 'Attention key',
|
||||
'attn_v': 'Attention value',
|
||||
'attn_output': 'Attention output',
|
||||
'ffn_norm': 'Feed-forward network normalization',
|
||||
'ffn_up': 'Feed-forward network "up"',
|
||||
'ffn_gate': 'Feed-forward network "gate"',
|
||||
'ffn_down': 'Feed-forward network "down"',
|
||||
'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models',
|
||||
'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models',
|
||||
'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models',
|
||||
'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models',
|
||||
'ssm_in': 'State space model input projections',
|
||||
'ssm_conv1d': 'State space model rolling/shift',
|
||||
'ssm_x': 'State space model selective parametrization',
|
||||
'ssm_a': 'State space model state compression',
|
||||
'ssm_d': 'State space model skip connection',
|
||||
'ssm_dt': 'State space model time step',
|
||||
'ssm_out': 'State space model output projection',
|
||||
'blk': 'Block',
|
||||
'enc': 'Encoder',
|
||||
'dec': 'Decoder',
|
||||
}
|
||||
|
||||
expanded_words = []
|
||||
for word in words:
|
||||
word_norm = word.strip().lower()
|
||||
if word_norm in abbreviation_dictionary:
|
||||
expanded_words.append(abbreviation_dictionary[word_norm].title())
|
||||
else:
|
||||
expanded_words.append(word.title())
|
||||
|
||||
return ' '.join(expanded_words)
|
||||
|
||||
|
||||
def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
|
||||
host_endian, file_endian = get_file_host_endian(reader)
|
||||
markdown_content = ""
|
||||
markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n'
|
||||
markdown_content += f'- Endian: {file_endian} endian\n'
|
||||
markdown_content += '\n'
|
||||
markdown_content += '## Key Value Metadata Store\n\n'
|
||||
markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n'
|
||||
markdown_content += '\n'
|
||||
|
||||
kv_dump_table: list[dict[str, str | int]] = []
|
||||
for n, field in enumerate(reader.fields.values(), 1):
|
||||
if not field.types:
|
||||
pretty_type = 'N/A'
|
||||
elif field.types[0] == GGUFValueType.ARRAY:
|
||||
nest_count = len(field.types) - 1
|
||||
pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
|
||||
else:
|
||||
pretty_type = str(field.types[-1].name)
|
||||
|
||||
def escape_markdown_inline_code(value_string):
|
||||
# Find the longest contiguous sequence of backticks in the string then
|
||||
# wrap string with appropriate number of backticks required to escape it
|
||||
max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
|
||||
inline_code_marker = '`' * (max_backticks + 1)
|
||||
|
||||
# If the string starts or ends with a backtick, add a space at the beginning and end
|
||||
if value_string.startswith('`') or value_string.endswith('`'):
|
||||
value_string = f" {value_string} "
|
||||
|
||||
return f"{inline_code_marker}{value_string}{inline_code_marker}"
|
||||
|
||||
total_elements = len(field.data)
|
||||
value = ""
|
||||
if len(field.types) == 1:
|
||||
curr_type = field.types[0]
|
||||
if curr_type == GGUFValueType.STRING:
|
||||
truncate_length = 60
|
||||
value_string = str(bytes(field.parts[-1]), encoding='utf-8')
|
||||
if len(value_string) > truncate_length:
|
||||
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
|
||||
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
|
||||
value = "{head}...{tail}".format(head=head, tail=tail)
|
||||
else:
|
||||
value = escape_markdown_inline_code(value_string)
|
||||
elif curr_type in reader.gguf_scalar_to_np:
|
||||
value = str(field.parts[-1][0])
|
||||
else:
|
||||
if field.types[0] == GGUFValueType.ARRAY:
|
||||
curr_type = field.types[1]
|
||||
array_elements = []
|
||||
|
||||
if curr_type == GGUFValueType.STRING:
|
||||
render_element = min(5, total_elements)
|
||||
for element_pos in range(render_element):
|
||||
truncate_length = 30
|
||||
value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
|
||||
if len(value_string) > truncate_length:
|
||||
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
|
||||
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
|
||||
value = "{head}...{tail}".format(head=head, tail=tail)
|
||||
else:
|
||||
value = escape_markdown_inline_code(value_string)
|
||||
array_elements.append(value)
|
||||
|
||||
elif curr_type in reader.gguf_scalar_to_np:
|
||||
render_element = min(7, total_elements)
|
||||
for element_pos in range(render_element):
|
||||
array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
|
||||
|
||||
value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
|
||||
|
||||
kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
|
||||
|
||||
kv_dump_table_header_map = [
|
||||
{'key_name':'n', 'header_name':'POS', 'align':'right'},
|
||||
{'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
|
||||
{'key_name':'total_elements', 'header_name':'Count', 'align':'right'},
|
||||
{'key_name':'field_name', 'header_name':'Key', 'align':'left'},
|
||||
{'key_name':'value', 'header_name':'Value', 'align':'left'},
|
||||
]
|
||||
|
||||
markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table)
|
||||
|
||||
markdown_content += "\n"
|
||||
|
||||
if not args.no_tensors:
|
||||
# Group tensors by their prefix and maintain order
|
||||
tensor_prefix_order: list[str] = []
|
||||
tensor_name_to_key: dict[str, int] = {}
|
||||
tensor_groups: dict[str, list[ReaderTensor]] = {}
|
||||
total_elements = sum(tensor.n_elements for tensor in reader.tensors)
|
||||
|
||||
# Parsing Tensors Record
|
||||
for key, tensor in enumerate(reader.tensors):
|
||||
tensor_components = tensor.name.split('.')
|
||||
|
||||
# Classify Tensor Group
|
||||
tensor_group_name = "base"
|
||||
if tensor_components[0] == 'blk':
|
||||
tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
|
||||
elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
|
||||
tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
|
||||
elif tensor_components[0] in ['enc', 'dec']:
|
||||
tensor_group_name = f"{tensor_components[0]}"
|
||||
|
||||
# Check if new Tensor Group
|
||||
if tensor_group_name not in tensor_groups:
|
||||
tensor_groups[tensor_group_name] = []
|
||||
tensor_prefix_order.append(tensor_group_name)
|
||||
|
||||
# Record Tensor and Tensor Position
|
||||
tensor_groups[tensor_group_name].append(tensor)
|
||||
tensor_name_to_key[tensor.name] = key
|
||||
|
||||
# Tensors Mapping Dump
|
||||
markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n'
|
||||
markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n'
|
||||
markdown_content += '\n'
|
||||
|
||||
for group in tensor_prefix_order:
|
||||
tensors = tensor_groups[group]
|
||||
group_elements = sum(tensor.n_elements for tensor in tensors)
|
||||
markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n"
|
||||
|
||||
markdown_content += "\n"
|
||||
|
||||
markdown_content += "### Tensor Data Offset\n"
|
||||
markdown_content += '\n'
|
||||
markdown_content += 'This table contains the offset and data segment relative to start of file\n'
|
||||
markdown_content += '\n'
|
||||
|
||||
tensor_mapping_table: list[dict[str, str | int]] = []
|
||||
for key, tensor in enumerate(reader.tensors):
|
||||
data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
|
||||
data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
|
||||
tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
|
||||
|
||||
tensors_mapping_table_header_map = [
|
||||
{'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
|
||||
{'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
|
||||
{'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'},
|
||||
{'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'},
|
||||
]
|
||||
|
||||
markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
|
||||
markdown_content += "\n"
|
||||
|
||||
for group in tensor_prefix_order:
|
||||
tensors = tensor_groups[group]
|
||||
group_elements = sum(tensor.n_elements for tensor in tensors)
|
||||
group_percentage = group_elements / total_elements * 100
|
||||
markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n\n"
|
||||
|
||||
# Precalculate column sizing for visual consistency
|
||||
prettify_element_est_count_size: int = 1
|
||||
prettify_element_count_size: int = 1
|
||||
prettify_dimension_max_widths: dict[int, int] = {}
|
||||
for tensor in tensors:
|
||||
prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements))))
|
||||
prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements)))
|
||||
for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))):
|
||||
prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size)))
|
||||
|
||||
# Generate Tensor Layer Table Content
|
||||
tensor_dump_table: list[dict[str, str | int]] = []
|
||||
for tensor in tensors:
|
||||
human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
|
||||
pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))))
|
||||
element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})"
|
||||
element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}"
|
||||
type_name_string = f"{tensor.tensor_type.name}"
|
||||
tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string})
|
||||
|
||||
tensor_dump_table_header_map = [
|
||||
{'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
|
||||
{'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
|
||||
{'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
|
||||
{'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
|
||||
{'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'},
|
||||
{'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
|
||||
]
|
||||
|
||||
markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table)
|
||||
|
||||
markdown_content += "\n"
|
||||
markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
|
||||
markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
|
||||
markdown_content += "\n\n"
|
||||
|
||||
print(markdown_content) # noqa: NP100
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
|
||||
parser.add_argument("model", type=str, help="GGUF format model filename")
|
||||
parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
|
||||
parser.add_argument("--json", action="store_true", help="Produce JSON output")
|
||||
parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
|
||||
parser.add_argument("--data-offset", action="store_true", help="Start of data offset")
|
||||
parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
|
||||
parser.add_argument("--markdown", action="store_true", help="Produce markdown output")
|
||||
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||
|
||||
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||
|
||||
if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
|
||||
logger.info(f'* Loading: {args.model}')
|
||||
|
||||
reader = GGUFReader(args.model, 'r')
|
||||
|
||||
if args.json:
|
||||
dump_metadata_json(reader, args)
|
||||
elif args.markdown:
|
||||
dump_markdown_metadata(reader, args)
|
||||
elif args.data_offset:
|
||||
print(reader.data_offset) # noqa: NP100
|
||||
elif args.data_alignment:
|
||||
print(reader.alignment) # noqa: NP100
|
||||
else:
|
||||
dump_metadata(reader, args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
102
gguf-py/gguf/scripts/gguf_hash.py
Executable file
102
gguf-py/gguf/scripts/gguf_hash.py
Executable file
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
import hashlib
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
# Necessary to load the local gguf package
|
||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from gguf import GGUFReader # noqa: E402
|
||||
|
||||
|
||||
logger = logging.getLogger("gguf-hash")
|
||||
|
||||
# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
|
||||
UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
|
||||
|
||||
|
||||
# For more information about what field.parts and field.data represent,
|
||||
# please see the comments in the modify_gguf.py example.
|
||||
def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None:
|
||||
sha1 = hashlib.sha1()
|
||||
sha256 = hashlib.sha256()
|
||||
uuidv5_sha1 = hashlib.sha1()
|
||||
uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
|
||||
|
||||
# Total Weight Calculation For Progress Bar
|
||||
total_weights = 0
|
||||
for n, tensor in enumerate(reader.tensors, 1):
|
||||
|
||||
# We don't need these
|
||||
if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
||||
continue
|
||||
|
||||
# Calculate Tensor Volume
|
||||
sum_weights_in_tensor = 1
|
||||
for dim in tensor.shape:
|
||||
sum_weights_in_tensor *= dim
|
||||
total_weights += sum_weights_in_tensor
|
||||
|
||||
# Hash Progress Bar
|
||||
bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
|
||||
|
||||
# Hashing Process
|
||||
for tensor in reader.tensors:
|
||||
|
||||
# We don't need these
|
||||
if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
|
||||
continue
|
||||
|
||||
# Progressbar
|
||||
sum_weights_in_tensor = 1
|
||||
for dim in tensor.shape:
|
||||
sum_weights_in_tensor *= dim
|
||||
bar.update(sum_weights_in_tensor)
|
||||
|
||||
if not no_layer:
|
||||
|
||||
sha1_layer = hashlib.sha1()
|
||||
sha1_layer.update(tensor.data.data)
|
||||
print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
|
||||
|
||||
sha256_layer = hashlib.sha256()
|
||||
sha256_layer.update(tensor.data.data)
|
||||
print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
|
||||
|
||||
sha1.update(tensor.data.data)
|
||||
sha256.update(tensor.data.data)
|
||||
uuidv5_sha1.update(tensor.data.data)
|
||||
|
||||
# Flush Hash Progress Bar
|
||||
bar.close()
|
||||
|
||||
# Display Hash Output
|
||||
print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
|
||||
print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100
|
||||
print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
|
||||
parser.add_argument("model", type=str, help="GGUF format model filename")
|
||||
parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash")
|
||||
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||
parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
|
||||
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
|
||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||
reader = GGUFReader(args.model, 'r')
|
||||
gguf_hash(reader, args.model, not args.progressbar, args.no_layer)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
244
gguf-py/gguf/scripts/gguf_new_metadata.py
Executable file
244
gguf-py/gguf/scripts/gguf_new_metadata.py
Executable file
|
@ -0,0 +1,244 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from typing import Any, Sequence, NamedTuple
|
||||
|
||||
# Necessary to load the local gguf package
|
||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import gguf
|
||||
|
||||
logger = logging.getLogger("gguf-new-metadata")
|
||||
|
||||
|
||||
class MetadataDetails(NamedTuple):
|
||||
type: gguf.GGUFValueType
|
||||
value: Any
|
||||
description: str = ''
|
||||
|
||||
|
||||
def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
|
||||
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
|
||||
# Host is little endian
|
||||
host_endian = gguf.GGUFEndian.LITTLE
|
||||
swapped_endian = gguf.GGUFEndian.BIG
|
||||
else:
|
||||
# Sorry PDP or other weird systems that don't use BE or LE.
|
||||
host_endian = gguf.GGUFEndian.BIG
|
||||
swapped_endian = gguf.GGUFEndian.LITTLE
|
||||
|
||||
if reader.byte_order == "S":
|
||||
return swapped_endian
|
||||
else:
|
||||
return host_endian
|
||||
|
||||
|
||||
def decode_field(field: gguf.ReaderField | None) -> Any:
|
||||
if field and field.types:
|
||||
main_type = field.types[0]
|
||||
|
||||
if main_type == gguf.GGUFValueType.ARRAY:
|
||||
sub_type = field.types[-1]
|
||||
|
||||
if sub_type == gguf.GGUFValueType.STRING:
|
||||
return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
|
||||
else:
|
||||
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
|
||||
if main_type == gguf.GGUFValueType.STRING:
|
||||
return str(bytes(field.parts[-1]), encoding='utf-8')
|
||||
else:
|
||||
return field.parts[-1][0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
|
||||
field = reader.get_field(key)
|
||||
|
||||
return decode_field(field)
|
||||
|
||||
|
||||
def find_token(token_list: Sequence[int], token: str) -> Sequence[int]:
|
||||
token_ids = [index for index, value in enumerate(token_list) if value == token]
|
||||
|
||||
if len(token_ids) == 0:
|
||||
raise LookupError(f'Unable to find "{token}" in token list!')
|
||||
|
||||
return token_ids
|
||||
|
||||
|
||||
def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, MetadataDetails], remove_metadata: Sequence[str]) -> None:
|
||||
for field in reader.fields.values():
|
||||
# Suppress virtual fields and fields written by GGUFWriter
|
||||
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
|
||||
logger.debug(f'Suppressing {field.name}')
|
||||
continue
|
||||
|
||||
# Skip old chat templates if we have new ones
|
||||
if field.name.startswith(gguf.Keys.Tokenizer.CHAT_TEMPLATE) and gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
|
||||
logger.debug(f'Skipping {field.name}')
|
||||
continue
|
||||
|
||||
if field.name in remove_metadata:
|
||||
logger.debug(f'Removing {field.name}')
|
||||
continue
|
||||
|
||||
old_val = MetadataDetails(field.types[0], decode_field(field))
|
||||
val = new_metadata.get(field.name, old_val)
|
||||
|
||||
if field.name in new_metadata:
|
||||
logger.debug(f'Modifying {field.name}: "{old_val.value}" -> "{val.value}" {val.description}')
|
||||
del new_metadata[field.name]
|
||||
elif val.value is not None:
|
||||
logger.debug(f'Copying {field.name}')
|
||||
|
||||
if val.value is not None:
|
||||
writer.add_key_value(field.name, val.value, val.type)
|
||||
|
||||
if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
|
||||
logger.debug('Adding chat template(s)')
|
||||
writer.add_chat_template(new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE].value)
|
||||
del new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE]
|
||||
|
||||
for key, val in new_metadata.items():
|
||||
logger.debug(f'Adding {key}: "{val.value}" {val.description}')
|
||||
writer.add_key_value(key, val.value, val.type)
|
||||
|
||||
total_bytes = 0
|
||||
|
||||
for tensor in reader.tensors:
|
||||
total_bytes += tensor.n_bytes
|
||||
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
|
||||
|
||||
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
|
||||
|
||||
writer.write_header_to_file()
|
||||
writer.write_kv_data_to_file()
|
||||
writer.write_ti_data_to_file()
|
||||
|
||||
for tensor in reader.tensors:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
writer.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
tokenizer_metadata = (getattr(gguf.Keys.Tokenizer, n) for n in gguf.Keys.Tokenizer.__dict__.keys() if not n.startswith('_'))
|
||||
token_names = dict((n.split('.')[-1][:-len('_token_id')], n) for n in tokenizer_metadata if n.endswith('_token_id'))
|
||||
|
||||
parser = argparse.ArgumentParser(description="Make a copy of a GGUF file with new metadata")
|
||||
parser.add_argument("input", type=Path, help="GGUF format model input filename")
|
||||
parser.add_argument("output", type=Path, help="GGUF format model output filename")
|
||||
parser.add_argument("--general-name", type=str, help="The models general.name", metavar='"name"')
|
||||
parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."')
|
||||
parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."')
|
||||
parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json')
|
||||
parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"')
|
||||
parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url')
|
||||
parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '"<token>"'))
|
||||
parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0'))
|
||||
parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation")
|
||||
parser.add_argument("--verbose", action="store_true", help="Increase output verbosity")
|
||||
args = parser.parse_args(None if len(sys.argv) > 2 else ["--help"])
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||
|
||||
new_metadata = {}
|
||||
remove_metadata = args.remove_metadata or []
|
||||
|
||||
if args.general_name:
|
||||
new_metadata[gguf.Keys.General.NAME] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_name)
|
||||
|
||||
if args.general_description:
|
||||
new_metadata[gguf.Keys.General.DESCRIPTION] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_description)
|
||||
|
||||
if args.chat_template:
|
||||
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template)
|
||||
|
||||
if args.chat_template_config:
|
||||
with open(args.chat_template_config, 'r') as fp:
|
||||
config = json.load(fp)
|
||||
template = config.get('chat_template')
|
||||
if template:
|
||||
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)
|
||||
|
||||
if args.pre_tokenizer:
|
||||
new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer)
|
||||
|
||||
if remove_metadata:
|
||||
logger.warning('*** Warning *** Warning *** Warning **')
|
||||
logger.warning('* Most metadata is required for a fully functional GGUF file,')
|
||||
logger.warning('* removing crucial metadata may result in a corrupt output file!')
|
||||
|
||||
if not args.force:
|
||||
logger.warning('* Enter exactly YES if you are positive you want to proceed:')
|
||||
response = input('YES, I am sure> ')
|
||||
if response != 'YES':
|
||||
logger.info("You didn't enter YES. Okay then, see ya!")
|
||||
sys.exit(0)
|
||||
|
||||
logger.info(f'* Loading: {args.input}')
|
||||
reader = gguf.GGUFReader(args.input, 'r')
|
||||
|
||||
arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE)
|
||||
endianess = get_byteorder(reader)
|
||||
|
||||
token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or []
|
||||
|
||||
for name, token in args.special_token or []:
|
||||
if name not in token_names:
|
||||
logger.warning(f'Unknown special token "{name}", ignoring...')
|
||||
else:
|
||||
ids = find_token(token_list, token)
|
||||
new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, ids[0], f'= {token}')
|
||||
|
||||
if len(ids) > 1:
|
||||
logger.warning(f'Multiple "{token}" tokens found, choosing ID {ids[0]}, use --special-token-by-id if you want another:')
|
||||
logger.warning(', '.join(str(i) for i in ids))
|
||||
|
||||
for name, id_string in args.special_token_by_id or []:
|
||||
if name not in token_names:
|
||||
logger.warning(f'Unknown special token "{name}", ignoring...')
|
||||
elif not id_string.isdecimal():
|
||||
raise LookupError(f'Token ID "{id_string}" is not a valid ID!')
|
||||
else:
|
||||
id_int = int(id_string)
|
||||
|
||||
if id_int >= 0 and id_int < len(token_list):
|
||||
new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, id_int, f'= {token_list[id_int]}')
|
||||
else:
|
||||
raise LookupError(f'Token ID {id_int} is not within token list!')
|
||||
|
||||
if os.path.isfile(args.output) and not args.force:
|
||||
logger.warning('*** Warning *** Warning *** Warning **')
|
||||
logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!')
|
||||
logger.warning('* Enter exactly YES if you are positive you want to proceed:')
|
||||
response = input('YES, I am sure> ')
|
||||
if response != 'YES':
|
||||
logger.info("You didn't enter YES. Okay then, see ya!")
|
||||
sys.exit(0)
|
||||
|
||||
logger.info(f'* Writing: {args.output}')
|
||||
writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess)
|
||||
|
||||
alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT)
|
||||
if alignment is not None:
|
||||
logger.debug(f'Setting custom alignment: {alignment}')
|
||||
writer.data_alignment = alignment
|
||||
|
||||
copy_with_new_metadata(reader, writer, new_metadata, remove_metadata)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
95
gguf-py/gguf/scripts/gguf_set_metadata.py
Executable file
95
gguf-py/gguf/scripts/gguf_set_metadata.py
Executable file
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env python3
|
||||
import logging
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Necessary to load the local gguf package
|
||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from gguf import GGUFReader # noqa: E402
|
||||
|
||||
logger = logging.getLogger("gguf-set-metadata")
|
||||
|
||||
|
||||
def minimal_example(filename: str) -> None:
|
||||
reader = GGUFReader(filename, 'r+')
|
||||
field = reader.fields['tokenizer.ggml.bos_token_id']
|
||||
if field is None:
|
||||
return
|
||||
part_index = field.data[0]
|
||||
field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2
|
||||
#
|
||||
# So what's this field.data thing? It's helpful because field.parts contains
|
||||
# _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists
|
||||
# of:
|
||||
#
|
||||
# Part index 0: Key length (27)
|
||||
# Part index 1: Key data ("tokenizer.ggml.bos_token_id")
|
||||
# Part index 2: Field type (4, the id for GGUFValueType.UINT32)
|
||||
# Part index 3: Field value
|
||||
#
|
||||
# Note also that each part is an NDArray slice, so even a part that
|
||||
# is only a single value like the key length will be a NDArray of
|
||||
# the key length type (numpy.uint32).
|
||||
#
|
||||
# The .data attribute in the Field is a list of relevant part indexes
|
||||
# and doesn't contain internal GGUF details like the key length part.
|
||||
# In this case, .data will be [3] - just the part index of the
|
||||
# field value itself.
|
||||
|
||||
|
||||
def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
|
||||
field = reader.get_field(args.key)
|
||||
if field is None:
|
||||
logger.error(f'! Field {repr(args.key)} not found')
|
||||
sys.exit(1)
|
||||
# Note that field.types is a list of types. This is because the GGUF
|
||||
# format supports arrays. For example, an array of UINT32 would
|
||||
# look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
|
||||
handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
|
||||
if handler is None:
|
||||
logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}')
|
||||
sys.exit(1)
|
||||
current_value = field.parts[field.data[0]][0]
|
||||
new_value = handler(args.value)
|
||||
logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}')
|
||||
if current_value == new_value:
|
||||
logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}')
|
||||
sys.exit(0)
|
||||
if args.dry_run:
|
||||
sys.exit(0)
|
||||
if not args.force:
|
||||
logger.warning('*** Warning *** Warning *** Warning **')
|
||||
logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.')
|
||||
logger.warning('* Enter exactly YES if you are positive you want to proceed:')
|
||||
response = input('YES, I am sure> ')
|
||||
if response != 'YES':
|
||||
logger.info("You didn't enter YES. Okay then, see ya!")
|
||||
sys.exit(0)
|
||||
field.parts[field.data[0]][0] = new_value
|
||||
logger.info('* Field changed. Successful completion.')
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Set a simple value in GGUF file metadata")
|
||||
parser.add_argument("model", type=str, help="GGUF format model filename")
|
||||
parser.add_argument("key", type=str, help="Metadata key to set")
|
||||
parser.add_argument("value", type=str, help="Metadata value to set")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything")
|
||||
parser.add_argument("--force", action="store_true", help="Change the field without confirmation")
|
||||
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||
|
||||
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||
|
||||
logger.info(f'* Loading: {args.model}')
|
||||
reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+')
|
||||
set_metadata(reader, args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue