gguf-py: Refactor and allow reading/modifying existing GGUF files (#3981)

* gguf-py: Refactor and add file reading support

* Replay changes from #3871

Credit to @cebtenzzre for that pull

* Various type annotation fixes.

* sort imports with isort (again)

* Fix missing return statement in add_tensor

* style cleanup with flake8

* fix NamedTuple and Enum usage

* Fix an issue with state init in GGUFReader

Move examples to an examples/ directory

Clean up examples

Add an example of modifying keys in a GGUF file

Update documentation with info on examples

Try to support people importing gguf/gguf.py directly

* Damagage is not a word.

* Clean up gguf-py/examples/modify_gguf.py whitespace

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* Update gguf-py/examples/modify_gguf.py formatting

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* Update gguf-py/gguf/gguf_reader.py type hint

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* Make examples executable, formatting changes

* Add more information to GGUFReader and examples comments

* Include a gguf Python package version bump

* Add convert-gguf-endian.py script

* cleanup

* gguf-py : bump minor version

* Reorganize scripts

* Make GGUFReader endian detection less arbitrary

* Add JSON dumping support to gguf-dump.py

Which I kind of regret now

* A few for gguf-dump.py cleanups

* Murder accidental tuple in gguf-py/scripts/gguf-dump.py

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* cleanup

* constants : remove unneeded type annotations

* fix python 3.8 compat

* Set up gguf- scripts in pyproject.toml

* And include scripts/__init__.py, derp

* convert.py: We can't currently support Q8_0 on big endian.

* gguf-py: SpecialVocab: Always try available sources for special token ids

gguf-py: SpecialVocab: Try to load merges from merges.txt if not in tokenizer.json

gguf-py: SpecialVocab: Add 'add_bos_token' type bools to GGUF metadata
u

* cleanup

* Promote add_X_token to GGUF metadata for BOS and EOS

---------

Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
This commit is contained in:
Kerfuffle 2023-11-10 22:04:50 -07:00 committed by GitHub
parent 4a4fd3eefa
commit 34b0a08207
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 1982 additions and 1176 deletions

View file

@ -0,0 +1,12 @@
import os
from importlib import import_module
os.environ["NO_LOCAL_GGUF"] = "TRUE"
gguf_convert_endian_entrypoint = import_module("scripts.gguf-convert-endian").main
gguf_dump_entrypoint = import_module("scripts.gguf-dump").main
gguf_set_metadata_entrypoint = import_module("scripts.gguf-set-metadata").main
del import_module, os

View file

@ -0,0 +1,113 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
import numpy as np
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
import gguf
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
# Host is little endian
host_endian = "little"
swapped_endian = "big"
else:
# Sorry PDP or other weird systems that don't use BE or LE.
host_endian = "big"
swapped_endian = "little"
if reader.byte_order == "S":
file_endian = swapped_endian
else:
file_endian = host_endian
if args.order == "native":
order = host_endian
print(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
if file_endian == order:
print(f"* File is already {order.upper()} endian. Nothing to do.")
sys.exit(0)
print("* Checking tensors for conversion compatibility")
for tensor in reader.tensors:
if tensor.tensor_type not in (
gguf.GGMLQuantizationType.F32,
gguf.GGMLQuantizationType.F16,
gguf.GGMLQuantizationType.Q8_0,
):
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
print(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
if args.dry_run:
return
print("\n*** Warning *** Warning *** Warning **")
print("* This conversion process may damage the file. Ensure you have a backup.")
if order != host_endian:
print("* Requested endian differs from host, you will not be able to load the model on this machine.")
print("* The file will be modified immediately, so if conversion fails or is interrupted")
print("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
response = input("YES, I am sure> ")
if response != "YES":
print("You didn't enter YES. Okay then, see ya!")
sys.exit(0)
print(f"\n* Converting fields ({len(reader.fields)})")
for idx, field in enumerate(reader.fields.values()):
print(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
for part in field.parts:
part.byteswap(inplace=True)
print(f"\n* Converting tensors ({len(reader.tensors)})")
for idx, tensor in enumerate(reader.tensors):
print(
f" - {idx:4}: Converting tensor {repr(tensor.name)}, type={tensor.tensor_type.name}, "
f"elements={tensor.n_elements}... ",
end="",
)
tensor_type = tensor.tensor_type
for part in tensor.field.parts:
part.byteswap(inplace=True)
if tensor_type != gguf.GGMLQuantizationType.Q8_0:
tensor.data.byteswap(inplace=True)
print()
continue
# A Q8_0 block consists of a f16 delta followed by 32 int8 quants, so 34 bytes
block_size = 34
n_blocks = len(tensor.data) // block_size
for block_num in range(n_blocks):
block_offs = block_num * block_size
# I know I said f16, but it doesn't matter here - any simple 16 bit type works.
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
delta.byteswap(inplace=True)
if block_num % 100000 == 0:
print(f"[{(n_blocks - block_num) // 1000}K]", end="")
sys.stdout.flush()
print()
print("* Completion")
def main() -> None:
parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
parser.add_argument(
"model", type=str,
help="GGUF format model filename",
)
parser.add_argument(
"order", type=str, choices=['big', 'little', 'native'],
help="Requested byte order",
)
parser.add_argument(
"--dry-run", action="store_true",
help="Don't actually change anything",
)
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
print(f'* Loading: {args.model}')
reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
convert_byteorder(reader, args)
if __name__ == "__main__":
main()

116
gguf-py/scripts/gguf-dump.py Executable file
View file

@ -0,0 +1,116 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
from typing import Any
import numpy as np
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFReader, GGUFValueType # noqa: E402
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
if reader.byte_order == 'S':
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
else:
file_endian = host_endian
return (host_endian, file_endian)
# For more information about what field.parts and field.data represent,
# please see the comments in the modify_gguf.py example.
def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
host_endian, file_endian = get_file_host_endian(reader)
print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.')
print(f'\n* Dumping {len(reader.fields)} key/value pair(s)')
for n, field in enumerate(reader.fields.values(), 1):
if not field.types:
pretty_type = 'N/A'
elif field.types[0] == GGUFValueType.ARRAY:
nest_count = len(field.types) - 1
pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
else:
pretty_type = str(field.types[-1].name)
print(f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}', end = '')
if len(field.types) == 1:
curr_type = field.types[0]
if curr_type == GGUFValueType.STRING:
print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '')
elif field.types[0] in reader.gguf_scalar_to_np:
print(' = {0}'.format(field.parts[-1][0]), end = '')
print()
if args.no_tensors:
return
print(f'\n* Dumping {len(reader.tensors)} tensor(s)')
for n, tensor in enumerate(reader.tensors, 1):
prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}')
def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
import json
host_endian, file_endian = get_file_host_endian(reader)
metadata: dict[str, Any] = {}
tensors: dict[str, Any] = {}
result = {
"filename": args.model,
"endian": file_endian,
"metadata": metadata,
"tensors": tensors,
}
for idx, field in enumerate(reader.fields.values()):
curr: dict[str, Any] = {
"index": idx,
"type": field.types[0].name if field.types else 'UNKNOWN',
"offset": field.offset,
}
metadata[field.name] = curr
if field.types[:1] == [GGUFValueType.ARRAY]:
curr["array_types"] = [t.name for t in field.types][1:]
if not args.json_array:
continue
itype = field.types[-1]
if itype == GGUFValueType.STRING:
curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
else:
curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
elif field.types[0] == GGUFValueType.STRING:
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
else:
curr["value"] = field.parts[-1].tolist()[0]
for idx, tensor in enumerate(reader.tensors):
tensors[tensor.name] = {
"index": idx,
"shape": tensor.shape.tolist(),
"type": tensor.tensor_type.name,
"offset": tensor.field.offset,
}
json.dump(result, sys.stdout)
def main() -> None:
parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
parser.add_argument("model", type=str, help="GGUF format model filename")
parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
parser.add_argument("--json", action="store_true", help="Produce JSON output")
parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
if not args.json:
print(f'* Loading: {args.model}')
reader = GGUFReader(args.model, 'r')
if args.json:
dump_metadata_json(reader, args)
else:
dump_metadata(reader, args)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
import argparse
import os
import sys
from pathlib import Path
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFReader # noqa: E402
def minimal_example(filename: str) -> None:
reader = GGUFReader(filename, 'r+')
field = reader.fields['tokenizer.ggml.bos_token_id']
if field is None:
return
part_index = field.data[0]
field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2
#
# So what's this field.data thing? It's helpful because field.parts contains
# _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists
# of:
#
# Part index 0: Key length (27)
# Part index 1: Key data ("tokenizer.ggml.bos_token_id")
# Part index 2: Field type (4, the id for GGUFValueType.UINT32)
# Part index 3: Field value
#
# Note also that each part is an NDArray slice, so even a part that
# is only a single value like the key length will be a NDArray of
# the key length type (numpy.uint32).
#
# The .data attribute in the Field is a list of relevant part indexes
# and doesn't contain internal GGUF details like the key length part.
# In this case, .data will be [3] - just the part index of the
# field value itself.
def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
field = reader.get_field(args.key)
if field is None:
print(f'! Field {repr(args.key)} not found', file = sys.stderr)
sys.exit(1)
# Note that field.types is a list of types. This is because the GGUF
# format supports arrays. For example, an array of UINT32 would
# look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
if handler is None:
print(
f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}',
file = sys.stderr,
)
sys.exit(1)
current_value = field.parts[field.data[0]][0]
new_value = handler(args.value)
print(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}')
if current_value == new_value:
print(f'- Key {repr(args.key)} already set to requested value {current_value}')
sys.exit(0)
if args.dry_run:
sys.exit(0)
if not args.force:
print('*** Warning *** Warning *** Warning **')
print('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.')
print('* Enter exactly YES if you are positive you want to proceed:')
response = input('YES, I am sure> ')
if response != 'YES':
print("You didn't enter YES. Okay then, see ya!")
sys.exit(0)
field.parts[field.data[0]][0] = new_value
print('* Field changed. Successful completion.')
def main() -> None:
parser = argparse.ArgumentParser(description="Set a simple value in GGUF file metadata")
parser.add_argument("model", type=str, help="GGUF format model filename")
parser.add_argument("key", type=str, help="Metadata key to set")
parser.add_argument("value", type=str, help="Metadata value to set")
parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything")
parser.add_argument("--force", action="store_true", help="Change the field without confirmation")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
print(f'* Loading: {args.model}')
reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+')
set_metadata(reader, args)
if __name__ == '__main__':
main()