Add more information to GGUFReader and examples comments
This commit is contained in:
parent
2360aaadb4
commit
8e250fe527
3 changed files with 38 additions and 2 deletions
|
@ -8,6 +8,8 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
from gguf import GGUFReader, GGUFValueType # noqa: E402
|
||||
|
||||
|
||||
# For more information about what field.parts and field.data represent,
|
||||
# please see the comments in the modify_gguf.py example.
|
||||
def dump_gguf(filename: str) -> None:
|
||||
print(f'* Loading: {filename}')
|
||||
reader = GGUFReader(filename, 'r')
|
||||
|
|
|
@ -8,12 +8,41 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
from gguf import GGUFReader # noqa: E402
|
||||
|
||||
|
||||
def minimal_example(filename: str) -> None:
|
||||
reader = GGUFReader(filename, 'r+')
|
||||
field = reader.fields['tokenizer.ggml.bos_token_id']
|
||||
if field is None:
|
||||
return
|
||||
part_index = field.data[0]
|
||||
field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2
|
||||
#
|
||||
# So what's this field.data thing? It's helpful because field.parts contains
|
||||
# _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists
|
||||
# of:
|
||||
#
|
||||
# Part index 0: Key length (27)
|
||||
# Part index 1: Key data ("tokenizer.ggml.bos_token_id")
|
||||
# Part index 2: Field type (4, the id for GGUFValueType.UINT32)
|
||||
# Part index 3: Field value
|
||||
#
|
||||
# Note also that each part is an NDArray slice, so even a part that
|
||||
# is only a single value like the key length will be a NDArray of
|
||||
# the key length type (numpy.uint32).
|
||||
#
|
||||
# The .data attribute in the Field is a list of relevant part indexes
|
||||
# and doesn't contain internal GGUF details like the key length part.
|
||||
# In this case, .data will be [3] - just the part index of the
|
||||
# field value itself.
|
||||
|
||||
|
||||
def change_gguf(reader: GGUFReader, key: str, value: str) -> None:
|
||||
field = reader.get_field(key)
|
||||
if field is None:
|
||||
print(f'! Field {repr(key)} not found', file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Note that field.types is a list of types. This is because the GGUF
|
||||
# format supports arrays. For example, an array of UINT32 would
|
||||
# look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
|
||||
handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
|
||||
if handler is None:
|
||||
print(f'! Field {repr(key)} has unsupported type: {field.types}')
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
#
|
||||
# GGUF file reading/modification support. For API usage information,
|
||||
# please see examples/modify_gguf.py and examples/dump_gguf.py
|
||||
#
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Dict, Literal, NamedTuple, TypeVar, Union
|
||||
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
@ -23,6 +27,7 @@ from gguf.constants import (
|
|||
GGUFValueType,
|
||||
)
|
||||
|
||||
|
||||
READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue