fix typing and clean up

2024-06-09 16:02:23 -04:00 · 2024-06-09 16:02:23 -04:00 · 9d7f694438
commit 9d7f694438
parent f7ecd99691
2 changed files with 39 additions and 62 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -66,7 +66,7 @@ class Model:
    model_arch: gguf.MODEL_ARCH

    def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
-                 model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = 0, small_first_shard: bool = 0):
+                 model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
        if type(self) is Model:
            raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
        self.dir_model = dir_model
@ -2875,6 +2875,10 @@ def main() -> None:
        "auto": gguf.LlamaFileType.GUESSED,
    }

+    if args.use_temp_file and (args.split_max_tensors > 0 or args.split_max_size != "0"):
+        logger.error("Error: Cannot use temp file when splitting")
+        sys.exit(1)
+
    if args.outfile is not None:
        fname_out = args.outfile
    else:
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -9,7 +9,7 @@ from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
 from io import BufferedWriter
-from typing import IO, Any, Sequence, Mapping, TypeAlias
+from typing import IO, Any, Sequence, Mapping
 from string import ascii_letters, digits

 import numpy as np
@ -33,11 +33,6 @@ logger = logging.getLogger(__name__)


 SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
-NUM_SHARD_KV_DATA = 3
-METADATA_ONLY_INDICATOR = -1
-
-KVTempData: TypeAlias = dict[str, tuple[Any, GGUFValueType | None]] # {key: (value, type)}
-TensorTempData: TypeAlias = tuple[str, np.ndarray[Any, Any], GGMLQuantizationType | None] # (tensor name, tensor data, tensor dtype)


@dataclass
@ -65,7 +60,7 @@ class WriterState(Enum):

 class GGUFWriter:
    fout: list[BufferedWriter] | None
-    path: os.PathLike[str] | str | None
+    path: Path | None
    temp_file: tempfile.SpooledTemporaryFile[bytes] | None
    tensors: list[dict[str, TensorInfo]]
    kv_data: list[dict[str, GGUFValue]]
@ -88,15 +83,15 @@ class GGUFWriter:
        self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
        split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
    ):
-        self.fout = []
-        self.path = path
+        self.fout = None
+        self.path = Path(path) if path else None
        self.arch = arch
        self.endianess = endianess
        self.data_alignment = GGUF_DEFAULT_ALIGNMENT
        self.use_temp_file = use_temp_file
        self.temp_file = None
-        self.tensors = []
-        self.kv_data = [dict()]
+        self.tensors = [{}]
+        self.kv_data = [{}]
        self.split_max_tensors = split_max_tensors
        self.split_max_size = split_max_size
        self.dry_run = dry_run
@ -107,30 +102,16 @@ class GGUFWriter:
        self.state = WriterState.NO_FILE

        if self.small_first_shard:
-            self.tensors.append(dict())
+            self.tensors.append({})

        self.add_architecture()

-    def verify_arguments(self) -> None:
+    def format_shard_names(self, path: Path) -> list[Path]:
        if len(self.tensors) == 1:
-            logger.warning("Model fails split requirements, not splitting")
+            return [path]
+        return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]

-        # no shards are created when writing vocab so make one
-        if not self.tensors or len(self.tensors) == 0:
-            self.tensors = [dict()]
-
-    def format_shard_names(self, path: os.PathLike[str] | str | None = None) -> list[os.PathLike[str]]:
-        pathobj = Path(path)
-        if len(self.tensors) == 1:
-            return [pathobj]
-
-        shard_names = []
-        for i in range(len(self.tensors)):
-            shard_names.append(pathobj.with_name(SHARD_NAME_FORMAT.format(pathobj.stem, i + 1, len(self.tensors))))
-
-        return shard_names
-
-    def open_output_file(self, path: os.PathLike[str] | str | None = None) -> None:
+    def open_output_file(self, path: Path | None = None) -> None:
        if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
            # allow calling this multiple times as long as the path is the same
            return
@ -141,14 +122,14 @@ class GGUFWriter:
            self.path = path

        if self.path is not None:
-            self.fout = []
-            for fout in self.format_shard_names(self.path):
-                self.fout.append(open(fout, "wb"))
+            self.print_plan()
+            self.fout = [open(filename, "wb") for filename in self.format_shard_names(self.path)]
            self.state = WriterState.EMPTY

-    def print_plan(self, path: os.PathLike[str] | str | None = None) -> None:
+    def print_plan(self) -> None:
        logger.info("Writing the following files:")
-        filenames = self.format_shard_names(path)
+        assert self.path is not None
+        filenames = self.format_shard_names(self.path)
        assert len(filenames) == len(self.tensors)
        for name, tensors in zip(filenames, self.tensors):
            logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
@ -162,24 +143,28 @@ class GGUFWriter:
            return

        total_tensors = sum(len(t) for t in self.tensors)
-        for i in range(len(self.fout)):
+        assert self.fout is not None
+        total_splits = len(self.fout)
+        for i in range(total_splits):
            # just see whether it exists
            try:
                self.kv_data[i]
            except IndexError:
                self.kv_data.append(dict())
            self.kv_data[i][Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
-            self.kv_data[i][Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(len(self.fout), GGUFValueType.UINT16)
+            self.kv_data[i][Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
            self.kv_data[i][Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)

-    def write_header_to_file(self, path: os.PathLike[str] | str | None = None) -> None:
-        self.verify_arguments()
-        self.print_plan(path)
+    def write_header_to_file(self, path: Path | None = None) -> None:
+        if len(self.tensors) == 1:
+            logger.warning("Model fails split requirements, not splitting")
+
        self.open_output_file(path)

        if self.state is not WriterState.EMPTY:
            raise ValueError(f'Expected output file to be empty, got {self.state}')

+        assert self.fout is not None
        assert len(self.fout) == len(self.tensors)
        assert len(self.kv_data) == 1

@ -216,7 +201,6 @@ class GGUFWriter:
        assert self.fout is not None

        for fout, tensors in zip(self.fout, self.tensors):
-            assert fout is not None
            ti_data = bytearray()
            offset_tensor = 0

@ -235,7 +219,7 @@ class GGUFWriter:
        self.state = WriterState.TI_DATA

    def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
-        if key in self.kv_data:
+        if any(key in kv_data for kv_data in self.kv_data):
            raise ValueError(f'Duplicated key name {key!r}')

        self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
@ -279,9 +263,6 @@ class GGUFWriter:
        self.add_key_value(key, val, GGUFValueType.STRING)

    def add_array(self, key: str, val: Sequence[Any]) -> None:
-        if not isinstance(val, Sequence):
-            raise ValueError("Value must be a sequence for array type")
-
        self.add_key_value(key, val, GGUFValueType.ARRAY)

    @staticmethod
@ -295,9 +276,8 @@ class GGUFWriter:
        if self.state is not WriterState.NO_FILE:
            raise ValueError(f'Expected output file to be not yet opened, got {self.state}')

-        for tensors in self.tensors:
-            if name in tensors:
-                raise ValueError(f'Duplicated tensor name {name!r}')
+        if any(name in tensors for tensors in self.tensors):
+            raise ValueError(f'Duplicated tensor name {name!r}')

        if raw_dtype is None:
            if tensor_dtype == np.float16:
@ -321,10 +301,8 @@ class GGUFWriter:
            if tensor_dtype == np.uint8:
                tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)

-        # create splits as necessary, such as to start it off
-        if (len(self.tensors) == self.small_first_shard \
-            # or split when over tensor limit
-            or self.split_max_tensors != 0 and \
+        # split when over tensor limit
+        if (self.split_max_tensors != 0 and \
                len(self.tensors[-1]) >= self.split_max_tensors \
            # or split when over size limit
            or self.split_max_size != 0 and \
@ -369,7 +347,6 @@ class GGUFWriter:
            tensor.byteswap(inplace=True)

        for fout in self.fout:
-            assert fout is not None
            self.write_padding(fout, fout.tell())
            tensor.tofile(fout)
            self.write_padding(fout, tensor.nbytes)
@ -382,12 +359,10 @@ class GGUFWriter:
        assert self.fout is not None

        for fout in self.fout:
-            assert fout is not None
            self.write_padding(fout, fout.tell())

        if self.temp_file is None:
            for fout, tensors in zip(self.fout, self.tensors):
-                assert fout is not None
                bar = None

                if progress:
@ -409,7 +384,8 @@ class GGUFWriter:
        else:
            self.temp_file.seek(0)

-            shutil.copyfileobj(self.temp_file, self.fout)
+            assert self.fout is not None
+            shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
            self.flush()
            self.temp_file.close()

@ -418,14 +394,12 @@ class GGUFWriter:
    def flush(self) -> None:
        assert self.fout is not None
        for fout in self.fout:
-            assert fout is not None
            fout.flush()

    def close(self) -> None:
        if self.fout is not None:
            for fout in self.fout:
-                if fout is not None:
-                    fout.close()
+                fout.close()
            self.fout = []

    def add_architecture(self) -> None:
@ -705,12 +679,11 @@ class GGUFWriter:
        return kv_data

    def _write_packed(self, fout: BufferedWriter, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
-        assert fout is not None
        fout.write(self._pack(fmt, value, skip_pack_prefix))

    @staticmethod
    def format_n_bytes_to_str(num: int) -> str:
-        if num == METADATA_ONLY_INDICATOR:
+        if num == 0:
            return "negligible - metadata only"
        fnum = float(num)
        for unit in ("", "K", "M", "G"):