Replay changes from #3871

Credit to @cebtenzzre for that pull
This commit is contained in:
KerfuffleV2 2023-11-07 15:01:36 -07:00
parent b8c80df741
commit 8047aa192f
2 changed files with 54 additions and 31 deletions

View file

@ -5,7 +5,8 @@ import shutil
import struct import struct
import tempfile import tempfile
from io import BufferedWriter from io import BufferedWriter
from typing import Any, BinaryIO, Sequence from enum import Enum, auto
from typing import Any, IO, Sequence
import numpy as np import numpy as np
@ -21,18 +22,16 @@ from .constants import (
TokenType, TokenType,
) )
class WriterState(Enum):
EMPTY = auto()
HEADER = auto()
KV_DATA = auto()
TI_DATA = auto()
class GGUFWriter: class GGUFWriter:
fout: BufferedWriter fout: BufferedWriter
arch: str temp_file: tempfile.SpooledTemporaryFile[bytes] | None
offset_tensor = 0 tensors: list[np.ndarray[Any, Any]]
data_alignment = GGUF_DEFAULT_ALIGNMENT
kv_data = b""
kv_data_count = 0
ti_data = b""
ti_data_count = 0
use_temp_file: bool
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
tensors: list[tuple[np.ndarray[Any, Any], int]]
_simple_value_packing = { _simple_value_packing = {
GGUFValueType.UINT8: "B", GGUFValueType.UINT8: "B",
GGUFValueType.INT8: "b", GGUFValueType.INT8: "b",
@ -60,27 +59,47 @@ class GGUFWriter:
self.fout = open(path, "wb") self.fout = open(path, "wb")
self.arch = arch self.arch = arch
self.endianess = endianess self.endianess = endianess
self.add_architecture() self.offset_tensor = 0
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
self.kv_data = b""
self.kv_data_count = 0
self.ti_data = b""
self.ti_data_count = 0
self.use_temp_file = use_temp_file self.use_temp_file = use_temp_file
self.temp_file = None
self.tensors = [] self.tensors = []
print("gguf: This GGUF file is for {0} Endian only" print("gguf: This GGUF file is for {0} Endian only"
.format("Big" if self.endianess == GGUFEndian.BIG else "Little")) .format("Big" if self.endianess == GGUFEndian.BIG else "Little"))
self.state = WriterState.EMPTY
self.add_architecture()
def write_header_to_file(self) -> None: def write_header_to_file(self) -> None:
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')
self._write_packed("<I", GGUF_MAGIC, skip_pack_prefix = True) self._write_packed("<I", GGUF_MAGIC, skip_pack_prefix = True)
self._write_packed("I", GGUF_VERSION) self._write_packed("I", GGUF_VERSION)
self._write_packed("Q", self.ti_data_count) self._write_packed("Q", self.ti_data_count)
self._write_packed("Q", self.kv_data_count) self._write_packed("Q", self.kv_data_count)
self.flush() self.flush()
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) self.state = WriterState.HEADER
def write_kv_data_to_file(self) -> None: def write_kv_data_to_file(self) -> None:
if self.state is not WriterState.HEADER:
raise ValueError(f'Expected output file to contain the header, got {self.state}')
self.fout.write(self.kv_data) self.fout.write(self.kv_data)
self.flush() self.flush()
self.state = WriterState.KV_DATA
def write_ti_data_to_file(self) -> None: def write_ti_data_to_file(self) -> None:
if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
self.fout.write(self.ti_data) self.fout.write(self.ti_data)
self.flush() self.flush()
self.state = WriterState.TI_DATA
def add_key(self, key: str) -> None: def add_key(self, key: str) -> None:
self.add_val(key, GGUFValueType.STRING, add_vtype=False) self.add_val(key, GGUFValueType.STRING, add_vtype=False)
@ -173,6 +192,9 @@ class GGUFWriter:
return ((x + n - 1) // n) * n return ((x + n - 1) // n) * n
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None) -> None: def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None) -> None:
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')
if raw_dtype is None and tensor_dtype not in (np.float32, np.float16): if raw_dtype is None and tensor_dtype not in (np.float32, np.float16):
raise ValueError("Only F32 and F16 tensors are supported for now") raise ValueError("Only F32 and F16 tensors are supported for now")
@ -203,23 +225,21 @@ class GGUFWriter:
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype) self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes if self.temp_file is None:
self.tensors.append(tensor)
if self.temp_file is None:
self.tensors.append((tensor, pad))
return
tensor.tofile(self.temp_file) tensor.tofile(self.temp_file)
self.write_padding(self.temp_file, tensor.nbytes)
if pad != 0: def write_padding(self, fp: IO[bytes], n: int, align: int | None = None):
self.temp_file.write(bytes([0] * pad))
def write_padding(self, fp: BinaryIO, n: int, align: int | None = None) -> None:
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
if pad != 0: if pad != 0:
fp.write(bytes([0] * pad)) fp.write(bytes([0] * pad))
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
if self.state is not WriterState.TI_DATA:
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
if self.endianess==GGUFEndian.BIG: if self.endianess==GGUFEndian.BIG:
tensor.byteswap(inplace=True) tensor.byteswap(inplace=True)
self.write_padding(self.fout, self.fout.tell()) self.write_padding(self.fout, self.fout.tell())
@ -232,10 +252,13 @@ class GGUFWriter:
self.write_padding(self.fout, self.fout.tell()) self.write_padding(self.fout, self.fout.tell())
if self.temp_file is None: if self.temp_file is None:
for (currtensor, currpad) in self.tensors: while True:
currtensor.tofile(self.fout) try:
if currpad != 0: tensor = self.tensors.pop(0)
self.fout.write(bytes([0] * currpad)) except IndexError:
break
tensor.tofile(self.fout)
self.write_padding(self.fout, tensor.nbytes)
return return
self.temp_file.seek(0) self.temp_file.seek(0)

View file

@ -9,11 +9,8 @@ from typing import Any, Callable
from .gguf_writer import GGUFWriter from .gguf_writer import GGUFWriter
class SpecialVocab: class SpecialVocab:
load_merges: bool = False merges: list[str]
merges: list[str] = [] special_token_ids: dict[str, int]
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
special_token_ids: dict[str, int] = {}
n_vocab: int | None = None
def __init__( def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False, self, path: str | os.PathLike[str], load_merges: bool = False,
@ -23,8 +20,11 @@ class SpecialVocab:
self.special_token_ids = {} self.special_token_ids = {}
self.n_vocab = n_vocab self.n_vocab = n_vocab
self.load_merges = load_merges self.load_merges = load_merges
self.merges = []
if special_token_types is not None: if special_token_types is not None:
self.special_token_types = special_token_types self.special_token_types = special_token_types
else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
self._load(Path(path)) self._load(Path(path))
def _load(self, path: Path) -> None: def _load(self, path: Path) -> None: