diff --git a/gguf-py/examples/read_template.py b/gguf-py/examples/read_template.py new file mode 100644 index 000000000..34a998ae5 --- /dev/null +++ b/gguf-py/examples/read_template.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from gguf.gguf_reader import GGUFReader + +if __name__ == '__main__': + if len(sys.argv) < 2: + print("Usage: read_template.py ") + sys.exit(1) + gguf_file_path = sys.argv[1] + + reader = GGUFReader(gguf_file_path) + print(reader.read_field(reader.fields['tokenizer.chat_template'])) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 3500c7613..469759106 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -121,6 +121,21 @@ class GGUFReader: # Fetch a key/value metadata field by key. def get_field(self, key: str) -> Union[ReaderField, None]: return self.fields.get(key, None) + + def read_field(self, field): + if not field.types: + return None + if field.types[:1] == [GGUFValueType.ARRAY]: + itype = field.types[-1] + if itype == GGUFValueType.STRING: + return [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data] + else: + return [pv for idx in field.data for pv in field.parts[idx].tolist()] + elif field.types[0] == GGUFValueType.STRING: + return str(bytes(field.parts[-1]), encoding="utf-8") + else: + assert(field.types[0] in self.gguf_scalar_to_np) + return field.parts[-1].tolist()[0] # Fetch a tensor from the list by index. def get_tensor(self, idx: int) -> ReaderTensor: diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py index dbf891508..1b594ca4b 100755 --- a/gguf-py/scripts/gguf-dump.py +++ b/gguf-py/scripts/gguf-dump.py @@ -41,11 +41,7 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: pretty_type = str(field.types[-1].name) print(f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}', end = '') if len(field.types) == 1: - curr_type = field.types[0] - if curr_type == GGUFValueType.STRING: - print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '') - elif field.types[0] in reader.gguf_scalar_to_np: - print(' = {0}'.format(field.parts[-1][0]), end = '') + print(' = {0}'.format(repr(reader.read_field(field))[:60]), end = '') print() if args.no_tensors: return @@ -75,17 +71,7 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None: metadata[field.name] = curr if field.types[:1] == [GGUFValueType.ARRAY]: curr["array_types"] = [t.name for t in field.types][1:] - if not args.json_array: - continue - itype = field.types[-1] - if itype == GGUFValueType.STRING: - curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data] - else: - curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()] - elif field.types[0] == GGUFValueType.STRING: - curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8") - else: - curr["value"] = field.parts[-1].tolist()[0] + curr["value"] = reader.read_field(field) if not args.no_tensors: for idx, tensor in enumerate(reader.tensors): tensors[tensor.name] = {