From 9d7f694438f347d0bd1f79044ef6fa565caeb236 Mon Sep 17 00:00:00 2001
From: Christian Zhou-Zheng <christianzhouzheng@gmail.com>
Date: Sun, 9 Jun 2024 16:02:23 -0400
Subject: [PATCH] fix typing and clean up

---
 convert-hf-to-gguf.py       |  6 ++-
 gguf-py/gguf/gguf_writer.py | 95 +++++++++++++------------------------
 2 files changed, 39 insertions(+), 62 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index d1ecfbe8a..e1cb05fea 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -66,7 +66,7 @@ class Model:
     model_arch: gguf.MODEL_ARCH
 
     def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
-                 model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = 0, small_first_shard: bool = 0):
+                 model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
         if type(self) is Model:
             raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
         self.dir_model = dir_model
@@ -2875,6 +2875,10 @@ def main() -> None:
         "auto": gguf.LlamaFileType.GUESSED,
     }
 
+    if args.use_temp_file and (args.split_max_tensors > 0 or args.split_max_size != "0"):
+        logger.error("Error: Cannot use temp file when splitting")
+        sys.exit(1)
+
     if args.outfile is not None:
         fname_out = args.outfile
     else:
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index f5860533d..19556c552 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -9,7 +9,7 @@ from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
 from io import BufferedWriter
-from typing import IO, Any, Sequence, Mapping, TypeAlias
+from typing import IO, Any, Sequence, Mapping
 from string import ascii_letters, digits
 
 import numpy as np
@@ -33,11 +33,6 @@ logger = logging.getLogger(__name__)
 
 
 SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
-NUM_SHARD_KV_DATA = 3
-METADATA_ONLY_INDICATOR = -1
-
-KVTempData: TypeAlias = dict[str, tuple[Any, GGUFValueType | None]] # {key: (value, type)}
-TensorTempData: TypeAlias = tuple[str, np.ndarray[Any, Any], GGMLQuantizationType | None] # (tensor name, tensor data, tensor dtype)
 
 
 @dataclass
@@ -65,7 +60,7 @@ class WriterState(Enum):
 
 class GGUFWriter:
     fout: list[BufferedWriter] | None
-    path: os.PathLike[str] | str | None
+    path: Path | None
     temp_file: tempfile.SpooledTemporaryFile[bytes] | None
     tensors: list[dict[str, TensorInfo]]
     kv_data: list[dict[str, GGUFValue]]
@@ -88,15 +83,15 @@ class GGUFWriter:
         self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
         split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
     ):
-        self.fout = []
-        self.path = path
+        self.fout = None
+        self.path = Path(path) if path else None
         self.arch = arch
         self.endianess = endianess
         self.data_alignment = GGUF_DEFAULT_ALIGNMENT
         self.use_temp_file = use_temp_file
         self.temp_file = None
-        self.tensors = []
-        self.kv_data = [dict()]
+        self.tensors = [{}]
+        self.kv_data = [{}]
         self.split_max_tensors = split_max_tensors
         self.split_max_size = split_max_size
         self.dry_run = dry_run
@@ -107,30 +102,16 @@ class GGUFWriter:
         self.state = WriterState.NO_FILE
 
         if self.small_first_shard:
-            self.tensors.append(dict())
+            self.tensors.append({})
 
         self.add_architecture()
 
-    def verify_arguments(self) -> None:
+    def format_shard_names(self, path: Path) -> list[Path]:
         if len(self.tensors) == 1:
-            logger.warning("Model fails split requirements, not splitting")
+            return [path]
+        return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
 
-        # no shards are created when writing vocab so make one
-        if not self.tensors or len(self.tensors) == 0:
-            self.tensors = [dict()]
-
-    def format_shard_names(self, path: os.PathLike[str] | str | None = None) -> list[os.PathLike[str]]:
-        pathobj = Path(path)
-        if len(self.tensors) == 1:
-            return [pathobj]
-
-        shard_names = []
-        for i in range(len(self.tensors)):
-            shard_names.append(pathobj.with_name(SHARD_NAME_FORMAT.format(pathobj.stem, i + 1, len(self.tensors))))
-
-        return shard_names
-
-    def open_output_file(self, path: os.PathLike[str] | str | None = None) -> None:
+    def open_output_file(self, path: Path | None = None) -> None:
         if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
             # allow calling this multiple times as long as the path is the same
             return
@@ -141,14 +122,14 @@ class GGUFWriter:
             self.path = path
 
         if self.path is not None:
-            self.fout = []
-            for fout in self.format_shard_names(self.path):
-                self.fout.append(open(fout, "wb"))
+            self.print_plan()
+            self.fout = [open(filename, "wb") for filename in self.format_shard_names(self.path)]
             self.state = WriterState.EMPTY
 
-    def print_plan(self, path: os.PathLike[str] | str | None = None) -> None:
+    def print_plan(self) -> None:
         logger.info("Writing the following files:")
-        filenames = self.format_shard_names(path)
+        assert self.path is not None
+        filenames = self.format_shard_names(self.path)
         assert len(filenames) == len(self.tensors)
         for name, tensors in zip(filenames, self.tensors):
             logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
@@ -162,24 +143,28 @@ class GGUFWriter:
             return
 
         total_tensors = sum(len(t) for t in self.tensors)
-        for i in range(len(self.fout)):
+        assert self.fout is not None
+        total_splits = len(self.fout)
+        for i in range(total_splits):
             # just see whether it exists
             try:
                 self.kv_data[i]
             except IndexError:
                 self.kv_data.append(dict())
             self.kv_data[i][Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
-            self.kv_data[i][Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(len(self.fout), GGUFValueType.UINT16)
+            self.kv_data[i][Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
             self.kv_data[i][Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
 
-    def write_header_to_file(self, path: os.PathLike[str] | str | None = None) -> None:
-        self.verify_arguments()
-        self.print_plan(path)
+    def write_header_to_file(self, path: Path | None = None) -> None:
+        if len(self.tensors) == 1:
+            logger.warning("Model fails split requirements, not splitting")
+
         self.open_output_file(path)
 
         if self.state is not WriterState.EMPTY:
             raise ValueError(f'Expected output file to be empty, got {self.state}')
 
+        assert self.fout is not None
         assert len(self.fout) == len(self.tensors)
         assert len(self.kv_data) == 1
 
@@ -216,7 +201,6 @@ class GGUFWriter:
         assert self.fout is not None
 
         for fout, tensors in zip(self.fout, self.tensors):
-            assert fout is not None
             ti_data = bytearray()
             offset_tensor = 0
 
@@ -235,7 +219,7 @@ class GGUFWriter:
         self.state = WriterState.TI_DATA
 
     def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
-        if key in self.kv_data:
+        if any(key in kv_data for kv_data in self.kv_data):
             raise ValueError(f'Duplicated key name {key!r}')
 
         self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
@@ -279,9 +263,6 @@ class GGUFWriter:
         self.add_key_value(key, val, GGUFValueType.STRING)
 
     def add_array(self, key: str, val: Sequence[Any]) -> None:
-        if not isinstance(val, Sequence):
-            raise ValueError("Value must be a sequence for array type")
-
         self.add_key_value(key, val, GGUFValueType.ARRAY)
 
     @staticmethod
@@ -295,9 +276,8 @@ class GGUFWriter:
         if self.state is not WriterState.NO_FILE:
             raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
 
-        for tensors in self.tensors:
-            if name in tensors:
-                raise ValueError(f'Duplicated tensor name {name!r}')
+        if any(name in tensors for tensors in self.tensors):
+            raise ValueError(f'Duplicated tensor name {name!r}')
 
         if raw_dtype is None:
             if tensor_dtype == np.float16:
@@ -321,10 +301,8 @@ class GGUFWriter:
             if tensor_dtype == np.uint8:
                 tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)
 
-        # create splits as necessary, such as to start it off
-        if (len(self.tensors) == self.small_first_shard \
-            # or split when over tensor limit
-            or self.split_max_tensors != 0 and \
+        # split when over tensor limit
+        if (self.split_max_tensors != 0 and \
                 len(self.tensors[-1]) >= self.split_max_tensors \
             # or split when over size limit
             or self.split_max_size != 0 and \
@@ -369,7 +347,6 @@ class GGUFWriter:
             tensor.byteswap(inplace=True)
 
         for fout in self.fout:
-            assert fout is not None
             self.write_padding(fout, fout.tell())
             tensor.tofile(fout)
             self.write_padding(fout, tensor.nbytes)
@@ -382,12 +359,10 @@ class GGUFWriter:
         assert self.fout is not None
 
         for fout in self.fout:
-            assert fout is not None
             self.write_padding(fout, fout.tell())
 
         if self.temp_file is None:
             for fout, tensors in zip(self.fout, self.tensors):
-                assert fout is not None
                 bar = None
 
                 if progress:
@@ -409,7 +384,8 @@ class GGUFWriter:
         else:
             self.temp_file.seek(0)
 
-            shutil.copyfileobj(self.temp_file, self.fout)
+            assert self.fout is not None
+            shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
             self.flush()
             self.temp_file.close()
 
@@ -418,14 +394,12 @@ class GGUFWriter:
     def flush(self) -> None:
         assert self.fout is not None
         for fout in self.fout:
-            assert fout is not None
             fout.flush()
 
     def close(self) -> None:
         if self.fout is not None:
             for fout in self.fout:
-                if fout is not None:
-                    fout.close()
+                fout.close()
             self.fout = []
 
     def add_architecture(self) -> None:
@@ -705,12 +679,11 @@ class GGUFWriter:
         return kv_data
 
     def _write_packed(self, fout: BufferedWriter, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
-        assert fout is not None
         fout.write(self._pack(fmt, value, skip_pack_prefix))
 
     @staticmethod
     def format_n_bytes_to_str(num: int) -> str:
-        if num == METADATA_ONLY_INDICATOR:
+        if num == 0:
             return "negligible - metadata only"
         fnum = float(num)
         for unit in ("", "K", "M", "G"):