diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index b89f8ff2d..b1806e244 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -59,7 +59,7 @@ class Model: tensor_map: gguf.TensorNameMap tensor_names: set[str] | None fname_out: Path - gguf_writer: gguf.GGUFWriterSplit + gguf_writer: gguf.GGUFWriter # subclasses should define this! model_arch: gguf.MODEL_ARCH @@ -95,8 +95,8 @@ class Model: ftype_lw: str = ftype_up.lower() # allow templating the file name with the output ftype, useful with the "auto" ftype self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up) - self.gguf_writer = gguf.GGUFWriterSplit(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], split_arguments, - endianess=self.endianess, use_temp_file=self.use_temp_file) + self.gguf_writer = gguf.GGUFWriter(None, gguf.MODEL_ARCH_NAMES[self.model_arch], split_arguments, + endianess=self.endianess, use_temp_file=self.use_temp_file) @classmethod def __init_subclass__(cls): @@ -326,16 +326,14 @@ class Model: def write(self): self.write_tensors() - self.gguf_writer.init_shards() self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) self.gguf_writer.close() def write_vocab(self): - if self.gguf_writer.split_arguments.split: + if self.gguf_writer.split_arguments.split_style != gguf.SplitStyle.NONE: raise ValueError('Splitting the vocabulary is not supported') - self.gguf_writer.init_shards() self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() diff --git a/gguf-py/gguf/__init__.py b/gguf-py/gguf/__init__.py index a2197255a..ea5146b16 100644 --- a/gguf-py/gguf/__init__.py +++ b/gguf-py/gguf/__init__.py @@ -2,7 +2,6 @@ from .constants import * from .lazy import * from .gguf_reader import * from .gguf_writer import * -from .gguf_writer_split import * from .quants import * from .tensor_mapping import * from .vocab import * diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 4413e9010..84190837d 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -5,10 +5,13 @@ import os import shutil import struct import tempfile +from argparse import Namespace +from collections import deque from dataclasses import dataclass from enum import Enum, auto +from pathlib import Path from io import BufferedWriter -from typing import IO, Any, Sequence, Mapping +from typing import IO, Any, Sequence, Mapping, TypeAlias from string import ascii_letters, digits import numpy as np @@ -27,10 +30,19 @@ from .constants import ( ) from .quants import quant_shape_from_byte_shape +from .constants import Keys logger = logging.getLogger(__name__) +SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf" +NUM_SHARD_KV_DATA = 6 +METADATA_ONLY_INDICATOR = -1 + +KVTempData: TypeAlias = dict[str, tuple[Any, GGUFValueType | None]] # {key: (value, type)} +TensorTempData: TypeAlias = tuple[str, np.ndarray[Any, Any], GGMLQuantizationType | None] # (tensor name, tensor data, tensor dtype) + + @dataclass class TensorInfo: shape: Sequence[int] @@ -45,6 +57,25 @@ class GGUFValue: type: GGUFValueType +@dataclass +class Shard: + path: Path + tensor_count: int + size: int + tensors: deque[TensorTempData] + + +class SplitArguments: + def __init__(self, args: Namespace) -> None: + self.split_max_tensors = args.split_max_tensors if args.split_max_tensors else 0 + self.split_max_size = GGUFWriter.split_str_to_n_bytes(args.split_max_size) if args.split_max_size else 0 + self.split_style = SplitStyle.TENSORS if self.split_max_tensors \ + else SplitStyle.SIZE if self.split_max_size \ + else SplitStyle.NONE + self.dry_run = args.dry_run + self.small_first_shard = args.no_tensor_first_split + + class WriterState(Enum): NO_FILE = auto() EMPTY = auto() @@ -54,11 +85,17 @@ class WriterState(Enum): WEIGHTS = auto() +class SplitStyle(Enum): + NONE = auto() + TENSORS = auto() + SIZE = auto() + + class GGUFWriter: - fout: BufferedWriter | None + fout: list[BufferedWriter | None] path: os.PathLike[str] | str | None temp_file: tempfile.SpooledTemporaryFile[bytes] | None - tensors: dict[str, TensorInfo] + tensors: list[dict[str, TensorInfo]] kv_data: dict[str, GGUFValue] state: WriterState _simple_value_packing = { @@ -76,25 +113,55 @@ class GGUFWriter: } def __init__( - self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, - endianess: GGUFEndian = GGUFEndian.LITTLE, add_architecture: bool = True + self, path: os.PathLike[str] | str | None, arch: str, split_arguments: SplitArguments, + use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE ): - self.fout = None + self.fout = [] self.path = path self.arch = arch self.endianess = endianess self.data_alignment = GGUF_DEFAULT_ALIGNMENT + self.split_arguments = split_arguments self.use_temp_file = use_temp_file self.temp_file = None - self.tensors = dict() + self.tensors = [] self.kv_data = dict() logger.info("gguf: This GGUF file is for {0} Endian only".format( "Big" if self.endianess == GGUFEndian.BIG else "Little", )) self.state = WriterState.NO_FILE - if add_architecture: - self.add_architecture() + if self.split_arguments.small_first_shard: + self.tensors.append(dict()) + + self.add_architecture() + + def verify_arguments(self) -> None: + total_tensors = sum(len(ti) for ti in self.tensors) + total_size = sum(sum(GGUFWriter.get_tensor_size(ti) for ti in t.values()) for t in self.tensors) + + if self.split_arguments.split_max_tensors and total_tensors < self.split_arguments.split_max_tensors: + logger.warning("Model has fewer tensors than the split threshold, not splitting") + self.split_style = SplitStyle.NONE + + if self.split_arguments.split_max_size and total_size < self.split_arguments.split_max_size: + logger.warning("Model has smaller size than the split threshold, not splitting") + self.split_style = SplitStyle.NONE + + # no shards are created when writing vocab so make one + if not self.tensors or len(self.tensors) == 0: + self.tensors.append(dict()) + + def format_shard_names(self) -> list[os.PathLike[str]]: + pathobj = Path(self.path) + if self.split_arguments.split_style == SplitStyle.NONE: + return [pathobj] + + shard_names = [] + for i in range(len(self.tensors)): + shard_names.append(pathobj.with_name(SHARD_NAME_FORMAT.format(pathobj.stem, i + 1, len(self.tensors)))) + + return shard_names def open_output_file(self, path: os.PathLike[str] | str | None = None) -> None: if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path): @@ -107,24 +174,52 @@ class GGUFWriter: self.path = path if self.path is not None: - if self.fout is not None: - self.fout.close() - self.fout = open(self.path, "wb") + self.fout = [] + for fout in self.format_shard_names(): + self.fout.append(open(fout, "wb")) self.state = WriterState.EMPTY + def print_plan(self) -> None: + logger.info("Writing the following files:") + for i in range(len(self.fout)): + logger.info(f" {self.fout[i].name}: n_tensors = {len(self.tensors[i])}, total_size = {GGUFWriter.format_n_bytes_to_str(GGUFWriter.get_tensors_total_size(self.tensors[i].values()))}") + + if self.split_arguments.dry_run: + logger.info("Dry run, not writing files") + exit() + def write_header_to_file(self, path: os.PathLike[str] | str | None = None) -> None: + self.verify_arguments() self.open_output_file(path) + self.print_plan() if self.state is not WriterState.EMPTY: raise ValueError(f'Expected output file to be empty, got {self.state}') - self._write_packed(" bytearray: + total_tensors = sum(len(t) for t in self.tensors) + kv_data += self._pack_val(Keys.Split.LLM_KV_SPLIT_NO, GGUFValueType.STRING, add_vtype=False) + kv_data += self._pack_val(shard_no, GGUFValueType.UINT16, add_vtype=True) + kv_data += self._pack_val(Keys.Split.LLM_KV_SPLIT_COUNT, GGUFValueType.STRING, add_vtype=False) + kv_data += self._pack_val(len(self.fout), GGUFValueType.UINT16, add_vtype=True) + kv_data += self._pack_val(Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT, GGUFValueType.STRING, add_vtype=False) + kv_data += self._pack_val(total_tensors, GGUFValueType.INT32, add_vtype=True) + return kv_data + def write_kv_data_to_file(self) -> None: if self.state is not WriterState.HEADER: raise ValueError(f'Expected output file to contain the header, got {self.state}') @@ -136,8 +231,16 @@ class GGUFWriter: kv_data += self._pack_val(key, GGUFValueType.STRING, add_vtype=False) kv_data += self._pack_val(val.value, val.type, add_vtype=True) - self.fout.write(kv_data) - self.flush() + if len(self.fout) > 1: + kv_data = self.add_shard_kv_data(kv_data, 0) + + # only the first shard needs kv data + self.fout[0].write(kv_data) + self.fout[0].flush() + + for i in range(1, len(self.fout)): + self.fout[i].write(self.add_shard_kv_data(bytearray(), i)) + self.fout[i].flush() self.state = WriterState.KV_DATA def write_ti_data_to_file(self) -> None: @@ -145,21 +248,23 @@ class GGUFWriter: raise ValueError(f'Expected output file to contain KV data, got {self.state}') assert self.fout is not None - ti_data = bytearray() - offset_tensor = 0 + for i in range(len(self.fout)): + assert self.fout[i] is not None + ti_data = bytearray() + offset_tensor = 0 - for name, ti in self.tensors.items(): - ti_data += self._pack_val(name, GGUFValueType.STRING, add_vtype=False) - n_dims = len(ti.shape) - ti_data += self._pack("I", n_dims) - for i in range(n_dims): - ti_data += self._pack("Q", ti.shape[n_dims - 1 - i]) - ti_data += self._pack("I", ti.dtype) - ti_data += self._pack("Q", offset_tensor) - offset_tensor += GGUFWriter.ggml_pad(ti.nbytes, self.data_alignment) + for name, ti in self.tensors[i].items(): + ti_data += self._pack_val(name, GGUFValueType.STRING, add_vtype=False) + n_dims = len(ti.shape) + ti_data += self._pack("I", n_dims) + for i in range(n_dims): + ti_data += self._pack("Q", ti.shape[n_dims - 1 - i]) + ti_data += self._pack("I", ti.dtype) + ti_data += self._pack("Q", offset_tensor) + offset_tensor += GGUFWriter.ggml_pad(ti.nbytes, self.data_alignment) - self.fout.write(ti_data) - self.flush() + self.fout[i].write(ti_data) + self.fout[i].flush() self.state = WriterState.TI_DATA def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None: @@ -248,7 +353,18 @@ class GGUFWriter: if tensor_dtype == np.uint8: tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype) - self.tensors[name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes) + # create splits as necessary, such as to start it off + if (len(self.tensors) == self.split_arguments.small_first_shard \ + # or split when over tensor limit + or (self.split_arguments.split_style == SplitStyle.TENSORS \ + and len(self.tensors[-1]) >= self.split_arguments.split_max_tensors) \ + # or split when over size limit + or (self.split_arguments.split_style == SplitStyle.SIZE \ + and GGUFWriter.get_tensors_total_size(self.tensors[-1].values()) + tensor_nbytes > self.split_arguments.split_max_size)): + + self.tensors.append(dict()) + + self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes) def add_tensor( self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, @@ -265,7 +381,7 @@ class GGUFWriter: self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype) if self.temp_file is None: - self.tensors[name].tensor = tensor + self.tensors[-1][name].tensor = tensor return tensor.tofile(self.temp_file) @@ -283,9 +399,12 @@ class GGUFWriter: if self.endianess == GGUFEndian.BIG: tensor.byteswap(inplace=True) - self.write_padding(self.fout, self.fout.tell()) - tensor.tofile(self.fout) - self.write_padding(self.fout, tensor.nbytes) + + for fout in self.fout: + assert fout is not None + self.write_padding(fout, fout.tell()) + tensor.tofile(fout) + self.write_padding(fout, tensor.nbytes) self.state = WriterState.WEIGHTS @@ -294,27 +413,31 @@ class GGUFWriter: assert self.fout is not None - self.write_padding(self.fout, self.fout.tell()) + for fout in self.fout: + assert fout is not None + self.write_padding(fout, fout.tell()) if self.temp_file is None: - bar = None + for i in range(len(self.fout)): + assert self.fout[i] is not None + bar = None - if progress: - from tqdm import tqdm + if progress: + from tqdm import tqdm - total_bytes = sum(t.nbytes for t in self.tensors.values()) + total_bytes = GGUFWriter.get_tensors_total_size(self.tensors[i].values()) - bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) + bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) - # relying on the fact that Python dicts preserve insertion order (since 3.7) - for ti in self.tensors.values(): - assert ti.tensor is not None # can only iterate once over the tensors - assert ti.tensor.nbytes == ti.nbytes - ti.tensor.tofile(self.fout) - if bar is not None: - bar.update(ti.nbytes) - self.write_padding(self.fout, ti.nbytes) - ti.tensor = None + # relying on the fact that Python dicts preserve insertion order (since 3.7) + for ti in self.tensors[i].values(): + assert ti.tensor is not None # can only iterate once over the tensors + assert ti.tensor.nbytes == ti.nbytes + ti.tensor.tofile(self.fout[i]) + if bar is not None: + bar.update(ti.nbytes) + self.write_padding(self.fout[i], ti.nbytes) + ti.tensor = None else: self.temp_file.seek(0) @@ -326,12 +449,16 @@ class GGUFWriter: def flush(self) -> None: assert self.fout is not None - self.fout.flush() + for fout in self.fout: + assert fout is not None + fout.flush() def close(self) -> None: if self.fout is not None: - self.fout.close() - self.fout = None + for fout in self.fout: + if fout is not None: + fout.close() + self.fout = [] def add_architecture(self) -> None: self.add_string(Keys.General.ARCHITECTURE, self.arch) @@ -609,6 +736,46 @@ class GGUFWriter: return kv_data - def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: - assert self.fout is not None - self.fout.write(self._pack(fmt, value, skip_pack_prefix)) + def _write_packed(self, fout: BufferedWriter, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: + assert fout is not None + fout.write(self._pack(fmt, value, skip_pack_prefix)) + + @staticmethod + def get_tensor_size(tensor) -> int: + try: + return tensor.data_type.elements_to_bytes(np.prod(tensor.shape)) + except AttributeError: # numpy ndarray[Any, Any] + return tensor.nbytes + + @staticmethod + def get_tensors_total_size(tensors) -> int: + return sum(GGUFWriter.get_tensor_size(ti) for ti in tensors) + + @staticmethod + def split_str_to_n_bytes(split_str: str) -> int: + if split_str.endswith("K"): + n = int(split_str[:-1]) * 1000 + elif split_str.endswith("M"): + n = int(split_str[:-1]) * 1000 * 1000 + elif split_str.endswith("G"): + n = int(split_str[:-1]) * 1000 * 1000 * 1000 + elif split_str.isnumeric(): + n = int(split_str) + else: + raise ValueError(f"Invalid split size: {split_str}, must be a number, optionally followed by K, M, or G") + + if n <= 0: + raise ValueError(f"Invalid split size: {split_str}, must be positive") + + return n + + @staticmethod + def format_n_bytes_to_str(num: int) -> str: + if num == METADATA_ONLY_INDICATOR: + return "negligible - metadata only" + fnum = float(num) + for unit in ("", "K", "M", "G"): + if abs(fnum) < 1000.0: + return f"{fnum:3.1f}{unit}" + fnum /= 1000.0 + return f"{fnum:.1f}T - over 1TB, --split recommended" \ No newline at end of file diff --git a/gguf-py/gguf/gguf_writer_split.py b/gguf-py/gguf/gguf_writer_split.py deleted file mode 100644 index bc1e9443a..000000000 --- a/gguf-py/gguf/gguf_writer_split.py +++ /dev/null @@ -1,245 +0,0 @@ -from __future__ import annotations - -import os -import logging -from enum import IntEnum -from typing import TYPE_CHECKING, Any, Sequence -from argparse import Namespace -from collections import deque -from dataclasses import dataclass -from pathlib import Path - -import numpy as np - -if TYPE_CHECKING: - from typing_extensions import TypeAlias - -from .constants import ( - GGMLQuantizationType, - GGUFEndian, - GGUFValueType -) -from .gguf_writer import GGUFWriter, WriterState -from .constants import Keys - -logger = logging.getLogger(__name__) - - -SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf" -METADATA_ONLY_INDICATOR = -1 - -KVTempData: TypeAlias = dict[str, tuple[Any, GGUFValueType | None]] # {key: (value, type)} -TensorTempData: TypeAlias = tuple[str, np.ndarray[Any, Any], GGMLQuantizationType | None] # (tensor name, tensor data, tensor dtype) - - -@dataclass -class Shard: - path: Path - tensor_count: int - size: int - tensors: deque[TensorTempData] - - -class SplitStyle(IntEnum): - NONE = 0 - TENSORS = 1 - SIZE = 2 - - -class SplitArguments: - def __init__(self, args: Namespace) -> None: - self.split_max_tensors = args.split_max_tensors if args.split_max_tensors else 0 - self.split_max_size = GGUFWriterSplit.split_str_to_n_bytes(args.split_max_size) if args.split_max_size else 0 - self.split_style = SplitStyle.TENSORS if self.split_max_tensors \ - else SplitStyle.SIZE if self.split_max_size \ - else SplitStyle.NONE - self.dry_run = args.dry_run - self.small_first_shard = args.no_tensor_first_split - - -class GGUFWriterSplit(GGUFWriter): - kv_data: KVTempData - split_arguments: SplitArguments - shards: list[Shard] - shard_writers: list[tuple[GGUFWriter, os.PathLike[str]]] - - def __init__(self, path: os.PathLike[str] | str, arch: str, split_arguments: SplitArguments, - use_temp_file: bool = True, endianess: GGUFEndian = GGUFEndian.LITTLE - ) -> None: - # we intentionally don't call superclass constructor - self.arch = arch - self.path = Path(path) - self.endianess = endianess - self.kv_data = {} - self.shards = [] - self.shard_writers = [] - self.total_tensors = 0 - self.use_temp_file = use_temp_file - self.split_arguments = split_arguments - self.recent_key = None - self.state = WriterState.EMPTY - - if self.split_arguments.small_first_shard: - self.shards.append(Shard(Path(), 0, METADATA_ONLY_INDICATOR, deque())) - - def init_shards(self) -> None: - self.total_tensors = sum(shard.tensor_count for shard in self.shards) - total_size = sum(shard.size for shard in self.shards) - - # check if we need to split - if self.split_arguments.split_max_tensors and self.total_tensors < self.split_arguments.split_max_tensors: - logger.warning("Model has fewer tensors than the split threshold, not splitting") - self.split_style = SplitStyle.NONE - - if self.split_arguments.split_max_size and total_size < self.split_arguments.split_max_size: - logger.warning("Model has smaller size than the split threshold, not splitting") - self.split_style = SplitStyle.NONE - - # no shards are created when writing vocab so make one - if not self.shards: - self.shards.append(Shard(Path(), 0, METADATA_ONLY_INDICATOR, deque())) - - # format shard names - if len(self.shards) == 1: - self.shards[0].path = self.path - else: - for i in range(len(self.shards)): - self.shards[i].path = self.path.with_name(SHARD_NAME_FORMAT.format(self.path.stem, i + 1, len(self.shards))) - - # print shard info - logger.info("Writing the following files:") - for shard in self.shards: - logger.info(f" {shard.path}: n_tensors = {shard.tensor_count}, total_size = {GGUFWriterSplit.format_n_bytes_to_str(shard.size)}") - - if self.split_arguments.dry_run: - logger.info("Dry run, not writing files") - exit() - - for i, shard in enumerate(self.shards): - # add_architecture is used for consistency - examples/gguf_split doesn't add arch to all shards - writer = GGUFWriter(None, self.arch, use_temp_file=self.use_temp_file, - endianess=self.endianess, add_architecture=(i == 0)) - - # only the first shard needs all the KV data - if i == 0: - for key, (value, etype) in self.kv_data.items(): - writer.add_key_value(key, value, etype) - - # add split metadata unless it's one file - small first shard splits even with SplitStyle.NONE - if self.split_arguments.split_style != SplitStyle.NONE or self.split_arguments.small_first_shard: - writer.add_uint16(Keys.Split.LLM_KV_SPLIT_NO, i) - writer.add_uint16(Keys.Split.LLM_KV_SPLIT_COUNT, len(self.shards)) - writer.add_int32(Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT, self.total_tensors) - - # add tensors, deque popleft() ensures references to eager tensors are not kept - while True: - try: - (name, tensor, dtype) = shard.tensors.popleft() - writer.add_tensor(name, tensor, raw_dtype=dtype) - except IndexError: - break - - self.shard_writers.append((writer, shard.path)) - - def write_header_to_file(self, path: os.PathLike[str] | str | None = None) -> None: - if self.state is not WriterState.EMPTY: - raise ValueError(f'Expected GGUFWriterSplit state to be EMPTY, got {self.state}') - - for (writer, path) in self.shard_writers: - writer.write_header_to_file(path) - - self.state = WriterState.HEADER - - def write_kv_data_to_file(self) -> None: - if self.state is not WriterState.HEADER: - raise ValueError(f'Expected GGUFWriterSplit state to be HEADER, got {self.state}') - - for (writer, _) in self.shard_writers: - writer.write_kv_data_to_file() - - self.state = WriterState.KV_DATA - - def write_tensors_to_file(self, *, progress: bool = False) -> None: - if self.state is not WriterState.KV_DATA: - raise ValueError(f'Expected GGUFWriterSplit state to be KV_DATA, got {self.state}') - - running_total = self.total_tensors - for i in range(len(self.shard_writers)): - writer = self.shard_writers[i][0] - is_metadata = len(writer.tensors) == 0 - if is_metadata: - logger.info(f"Writing to shard {i + 1}/{len(self.shards)} with metadata only") - else: - logger.info(f"Writing to shard {i + 1}/{len(self.shards)} with {len(writer.tensors)}/{running_total} remaining tensors (of {self.total_tensors} total)") - running_total -= len(writer.tensors) - writer.write_tensors_to_file(progress=(progress and not is_metadata)) - del writer - - self.state = WriterState.TI_DATA - - # override add_key_value to handle kv data separately - def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None: - self.kv_data[key] = (val, vtype) - - def add_tensor( - self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, - raw_dtype: GGMLQuantizationType | None = None, - ) -> None: - # we build splits as tensors are added so we need logic to figure out when to split - # logic is all in the conditional because it short-circuits, otherwise accessing self.shards[-1] would throw an error - - # create a first shard to start it off - if (len(self.shards) == self.split_arguments.small_first_shard \ - # or split when over tensor limit - or (self.split_arguments.split_style == SplitStyle.TENSORS \ - and self.shards[-1].tensor_count >= self.split_arguments.split_max_tensors) \ - # or split when over size limit - or (self.split_arguments.split_style == SplitStyle.SIZE \ - and self.shards[-1].size + GGUFWriterSplit.get_tensor_size(tensor) > self.split_arguments.split_max_size)): - - # we fill in the name later when we know how many shards there are - self.shards.append(Shard(Path(), 1, GGUFWriterSplit.get_tensor_size(tensor), deque([(name, tensor, raw_dtype)]))) - else: - self.shards[-1].tensor_count += 1 - self.shards[-1].size += GGUFWriterSplit.get_tensor_size(tensor) - self.shards[-1].tensors.append((name, tensor, raw_dtype)) - - def close(self) -> None: - for (writer, _) in self.shard_writers: - writer.close() - - @staticmethod - def get_tensor_size(tensor) -> int: - try: - return tensor.data_type.elements_to_bytes(np.prod(tensor.shape)) - except AttributeError: # numpy ndarray[Any, Any] - return tensor.nbytes - - @staticmethod - def split_str_to_n_bytes(split_str: str) -> int: - if split_str.endswith("K"): - n = int(split_str[:-1]) * 1000 - elif split_str.endswith("M"): - n = int(split_str[:-1]) * 1000 * 1000 - elif split_str.endswith("G"): - n = int(split_str[:-1]) * 1000 * 1000 * 1000 - elif split_str.isnumeric(): - n = int(split_str) - else: - raise ValueError(f"Invalid split size: {split_str}, must be a number, optionally followed by K, M, or G") - - if n <= 0: - raise ValueError(f"Invalid split size: {split_str}, must be positive") - - return n - - @staticmethod - def format_n_bytes_to_str(num: int) -> str: - if num == METADATA_ONLY_INDICATOR: - return "negligible - metadata only" - fnum = float(num) - for unit in ("", "K", "M", "G"): - if abs(fnum) < 1000.0: - return f"{fnum:3.1f}{unit}" - fnum /= 1000.0 - return f"{fnum:.1f}T - over 1TB, --split recommended"