From 5ce2dbcf386e5a2667b0d36a0d6d29f59ab6c423 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Thu, 31 Oct 2024 20:19:24 +0800 Subject: [PATCH 01/11] refactor gguf reader --- gguf-py/gguf/gguf_reader.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index e8e61abf8..92119640b 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -87,7 +87,8 @@ class GGUFReader: } def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): - self.data = np.memmap(path, mode = mode) + self.data = open(path, mode="rb") + self.mmap = np.memmap(path, mode = mode) offs = 0 # Check for GGUF magic @@ -127,7 +128,8 @@ class GGUFReader: if padding != 0: offs += self.alignment - padding self.data_offset = offs - self._build_tensors(offs, tensors_fields) + # self._build_tensors(offs, tensors_fields) + self.data.close() _DT = TypeVar('_DT', bound = npt.DTypeLike) @@ -140,16 +142,22 @@ class GGUFReader: return self.tensors[idx] def _get( - self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, lazy: bool = False, ) -> npt.NDArray[Any]: count = int(count) - itemsize = int(np.empty([], dtype = dtype).itemsize) - end_offs = offset + itemsize * count - return ( - self.data[offset:end_offs] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + itemsize = np.dtype(dtype).itemsize + if not lazy: + self.data.seek(offset) + return ( + np.frombuffer(self.data.read(itemsize * count), dtype = dtype, count = count) + .newbyteorder(override_order or self.byte_order) + ) + else: + return ( + self.mmap[offset:offset + itemsize * count] + .view(dtype = dtype)[:count] + .newbyteorder(override_order or self.byte_order) + ) def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -311,7 +319,7 @@ class GGUFReader: n_elements = n_elems, n_bytes = n_bytes, data_offset = data_offs, - data = self._get(data_offs, item_type, item_count).reshape(np_dims), + data = self._get(data_offs, item_type, item_count, lazy=True).reshape(np_dims), field = field, )) self.tensors = tensors From bcef54e10a14388deb24a2884b829ca7d551321d Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Thu, 31 Oct 2024 22:13:15 +0800 Subject: [PATCH 02/11] improve performance --- gguf-py/gguf/gguf_reader.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 92119640b..3dd004d66 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,6 +6,7 @@ from __future__ import annotations import logging import os +import struct from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -92,7 +93,7 @@ class GGUFReader: offs = 0 # Check for GGUF magic - if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: + if struct.unpack(" tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: - slen = self._get(offset, np.uint64) - return slen, self._get(offset + 8, np.uint8, slen[0]) + def _get_str(self, offset: int, return_size=False) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: + self.data.seek(offset) + slen = struct.unpack(' Date: Mon, 4 Nov 2024 16:06:46 +0800 Subject: [PATCH 03/11] fix mode --- gguf-py/gguf/gguf_reader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 3dd004d66..fd32a3d1f 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -88,8 +88,9 @@ class GGUFReader: } def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): - self.data = open(path, mode="rb") - self.mmap = np.memmap(path, mode = mode) + file_mode = "rb" if mode == 'r' else 'rb+' + self.data = open(path, mode=file_mode) + self.mmap = np.memmap(self.data, mode = mode) offs = 0 # Check for GGUF magic @@ -129,7 +130,7 @@ class GGUFReader: if padding != 0: offs += self.alignment - padding self.data_offset = offs - # self._build_tensors(offs, tensors_fields) + self._build_tensors(offs, tensors_fields) self.data.close() _DT = TypeVar('_DT', bound = npt.DTypeLike) From dd320df4b421531fad61609590f9145af9ac9499 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Mon, 4 Nov 2024 16:19:23 +0800 Subject: [PATCH 04/11] fix mode --- gguf-py/gguf/gguf_reader.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index fd32a3d1f..fb51756da 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -89,8 +89,9 @@ class GGUFReader: def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): file_mode = "rb" if mode == 'r' else 'rb+' + self.mode = mode self.data = open(path, mode=file_mode) - self.mmap = np.memmap(self.data, mode = mode) + self.mmap = np.memmap(path, mode = mode) offs = 0 # Check for GGUF magic @@ -150,10 +151,11 @@ class GGUFReader: itemsize = np.dtype(dtype).itemsize if not lazy: self.data.seek(offset) - return ( + data = ( np.frombuffer(self.data.read(itemsize * count), dtype = dtype, count = count) .newbyteorder(override_order or self.byte_order) ) + return data if self.mode == 'r' else data.copy() else: return ( self.mmap[offset:offset + itemsize * count] From 1dc02150bc56f195dea6b722b02ae507390a4628 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Mon, 4 Nov 2024 16:44:17 +0800 Subject: [PATCH 05/11] optimize offsets calculation --- gguf-py/gguf/gguf_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index fb51756da..754cb501c 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -203,9 +203,9 @@ class GGUFReader: # Handle arrays. if gtype == GGUFValueType.ARRAY: raw_itype = self._get(offs, np.uint32) - offs += int(raw_itype.nbytes) + offs = self.data.tell() alen = self._get(offs, np.uint64) - offs += int(alen.nbytes) + offs = self.data.tell() aparts: list[npt.NDArray[Any]] = [raw_itype, alen] data_idxs: list[int] = [] for idx in range(alen[0]): From a92c920eec5d53a34f718c119d55631488651767 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 01:03:38 +0800 Subject: [PATCH 06/11] revert unnecessary change --- gguf-py/gguf/gguf_reader.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 754cb501c..b6015d9bb 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,6 +6,7 @@ from __future__ import annotations import logging import os +import mmap import struct from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -91,7 +92,7 @@ class GGUFReader: file_mode = "rb" if mode == 'r' else 'rb+' self.mode = mode self.data = open(path, mode=file_mode) - self.mmap = np.memmap(path, mode = mode) + self.mmap = np.memmap(self.data, mode = mode) offs = 0 # Check for GGUF magic @@ -132,6 +133,8 @@ class GGUFReader: offs += self.alignment - padding self.data_offset = offs self._build_tensors(offs, tensors_fields) + + def __del__(self) -> None: self.data.close() _DT = TypeVar('_DT', bound = npt.DTypeLike) @@ -145,23 +148,17 @@ class GGUFReader: return self.tensors[idx] def _get( - self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, lazy: bool = False, + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, ) -> npt.NDArray[Any]: count = int(count) itemsize = np.dtype(dtype).itemsize - if not lazy: - self.data.seek(offset) - data = ( - np.frombuffer(self.data.read(itemsize * count), dtype = dtype, count = count) - .newbyteorder(override_order or self.byte_order) - ) - return data if self.mode == 'r' else data.copy() - else: - return ( - self.mmap[offset:offset + itemsize * count] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + new_offset = offset + itemsize * count + self.data.seek(new_offset) + return ( + self.mmap[offset:new_offset] + .view(dtype = dtype)[:count] + .newbyteorder(override_order or self.byte_order) + ) def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -328,7 +325,7 @@ class GGUFReader: n_elements = n_elems, n_bytes = n_bytes, data_offset = data_offs, - data = self._get(data_offs, item_type, item_count, lazy=True).reshape(np_dims), + data = self._get(data_offs, item_type, item_count).reshape(np_dims), field = field, )) self.tensors = tensors From ad6fd8de25c6d2b682e7a63d5d93ad7538cf82b1 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 01:48:31 +0800 Subject: [PATCH 07/11] revert unnecessary change --- gguf-py/gguf/gguf_reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index b6015d9bb..f0ccc030f 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -152,10 +152,10 @@ class GGUFReader: ) -> npt.NDArray[Any]: count = int(count) itemsize = np.dtype(dtype).itemsize - new_offset = offset + itemsize * count - self.data.seek(new_offset) + end_offs = offset + itemsize * count + self.data.seek(end_offs) return ( - self.mmap[offset:new_offset] + self.mmap[offset:end_offs] .view(dtype = dtype)[:count] .newbyteorder(override_order or self.byte_order) ) From 6a13722ca5c07c0b812bdc872c14949d10544234 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 12:42:19 +0800 Subject: [PATCH 08/11] code format --- gguf-py/gguf/gguf_reader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index f0ccc030f..ee94ae056 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,7 +6,6 @@ from __future__ import annotations import logging import os -import mmap import struct from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -133,7 +132,7 @@ class GGUFReader: offs += self.alignment - padding self.data_offset = offs self._build_tensors(offs, tensors_fields) - + def __del__(self) -> None: self.data.close() @@ -174,7 +173,7 @@ class GGUFReader: def _get_str(self, offset: int, return_size=False) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: self.data.seek(offset) slen = struct.unpack(' Date: Tue, 5 Nov 2024 14:01:38 +0800 Subject: [PATCH 09/11] make mode compatiable --- gguf-py/gguf/gguf_reader.py | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index ee94ae056..eb1068b7a 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -88,7 +88,7 @@ class GGUFReader: } def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): - file_mode = "rb" if mode == 'r' else 'rb+' + file_mode = "rb+" if mode == 'r+' else 'rb' self.mode = mode self.data = open(path, mode=file_mode) self.mmap = np.memmap(self.data, mode = mode) @@ -147,17 +147,22 @@ class GGUFReader: return self.tensors[idx] def _get( - self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, use_mmap: bool = False ) -> npt.NDArray[Any]: count = int(count) itemsize = np.dtype(dtype).itemsize end_offs = offset + itemsize * count - self.data.seek(end_offs) - return ( - self.mmap[offset:end_offs] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + if self.mode != "r" or use_mmap: + data = ( + self.mmap[offset:end_offs] + .view(dtype = dtype)[:count] + .newbyteorder(override_order or self.byte_order) + ) + self.data.seek(end_offs) + else: + self.data.seek(offset) + data = np.frombuffer(self.data.read(itemsize * count), dtype = dtype) + return data def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -170,14 +175,15 @@ class GGUFReader: self.fields[field.name] = field return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) - def _get_str(self, offset: int, return_size=False) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: + def _get_str(self, offset: int) -> list[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: self.data.seek(offset) - slen = struct.unpack(' Date: Tue, 5 Nov 2024 15:15:19 +0800 Subject: [PATCH 10/11] revert --- gguf-py/gguf/gguf_reader.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index eb1068b7a..56332646c 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -150,7 +150,8 @@ class GGUFReader: self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, use_mmap: bool = False ) -> npt.NDArray[Any]: count = int(count) - itemsize = np.dtype(dtype).itemsize + dtype = np.dtype(dtype) + itemsize = dtype.itemsize end_offs = offset + itemsize * count if self.mode != "r" or use_mmap: data = ( @@ -161,6 +162,7 @@ class GGUFReader: self.data.seek(end_offs) else: self.data.seek(offset) + dtype = dtype.newbyteorder(override_order or self.byte_order) data = np.frombuffer(self.data.read(itemsize * count), dtype = dtype) return data @@ -176,13 +178,16 @@ class GGUFReader: return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) def _get_str(self, offset: int) -> list[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: - self.data.seek(offset) if self.mode != "r": slen = self._get(offset, np.uint64) - sdata = self._get(offset + 8, np.uint8, slen[0]) + sdata = self._get(offset + 8, np.uint8, slen.item()) else: - slen = np.frombuffer(self.data.read(8), dtype = np.uint64) - sdata = np.frombuffer(self.data.read(slen.item()), dtype = np.uint8) + # This is faster to return a read-only str structure with less seek calling. + self.data.seek(offset) + u64 = np.dtype(np.uint64).newbyteorder(self.byte_order) + u8 = np.dtype(np.uint8).newbyteorder(self.byte_order) + slen = np.frombuffer(self.data.read(8), dtype=u64) + sdata = np.frombuffer(self.data.read(slen.item()), dtype=u8) return [slen, sdata] def _get_field_parts( From 94d814c559a1afd77a8c63ffc8d2f4a34ae824e0 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Wed, 4 Dec 2024 15:25:19 +0800 Subject: [PATCH 11/11] fix reader on linux Signed-off-by: isotr0py <2037008807@qq.com> --- gguf-py/gguf/gguf_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 56332646c..ed2f5ef3f 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -95,6 +95,7 @@ class GGUFReader: offs = 0 # Check for GGUF magic + self.data.seek(offs) if struct.unpack("