Add more information to GGUFReader and examples comments

This commit is contained in:
KerfuffleV2 2023-11-09 02:52:42 -07:00
parent 2360aaadb4
commit 8e250fe527
3 changed files with 38 additions and 2 deletions

View file

@ -8,6 +8,8 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFReader, GGUFValueType # noqa: E402
# For more information about what field.parts and field.data represent,
# please see the comments in the modify_gguf.py example.
def dump_gguf(filename: str) -> None:
print(f'* Loading: {filename}')
reader = GGUFReader(filename, 'r')

View file

@ -8,12 +8,41 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFReader # noqa: E402
def minimal_example(filename: str) -> None:
reader = GGUFReader(filename, 'r+')
field = reader.fields['tokenizer.ggml.bos_token_id']
if field is None:
return
part_index = field.data[0]
field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2
#
# So what's this field.data thing? It's helpful because field.parts contains
# _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists
# of:
#
# Part index 0: Key length (27)
# Part index 1: Key data ("tokenizer.ggml.bos_token_id")
# Part index 2: Field type (4, the id for GGUFValueType.UINT32)
# Part index 3: Field value
#
# Note also that each part is an NDArray slice, so even a part that
# is only a single value like the key length will be a NDArray of
# the key length type (numpy.uint32).
#
# The .data attribute in the Field is a list of relevant part indexes
# and doesn't contain internal GGUF details like the key length part.
# In this case, .data will be [3] - just the part index of the
# field value itself.
def change_gguf(reader: GGUFReader, key: str, value: str) -> None:
field = reader.get_field(key)
if field is None:
print(f'! Field {repr(key)} not found', file = sys.stderr)
sys.exit(1)
# Note that field.types is a list of types. This is because the GGUF
# format supports arrays. For example, an array of UINT32 would
# look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
if handler is None:
print(f'! Field {repr(key)} has unsupported type: {field.types}')

View file

@ -1,8 +1,12 @@
#
# GGUF file reading/modification support. For API usage information,
# please see examples/modify_gguf.py and examples/dump_gguf.py
#
from __future__ import annotations
import os
from collections import OrderedDict
from typing import Any, Dict, Literal, NamedTuple, TypeVar, Union
from typing import Any, Literal, NamedTuple, TypeVar, Union
import numpy as np
import numpy.typing as npt
@ -23,6 +27,7 @@ from gguf.constants import (
GGUFValueType,
)
READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]