From bb5ee0209621ee9343554b9ab5907c968ae47e98 Mon Sep 17 00:00:00 2001
From: Christian Zhou-Zheng <christianzhouzheng@gmail.com>
Date: Wed, 5 Jun 2024 12:49:08 -0400
Subject: [PATCH] simplify even further and standardize with GGUFWriter

---
 convert-hf-to-gguf.py        |  2 +-
 gguf-py/gguf/gguf_manager.py | 41 ++++++++++--------------------------
 2 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 4b3dfdd70..d12373c41 100644
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -331,7 +331,7 @@ class Model:
         self.write_tensors()
         self.gguf_writer.write_header_to_file()
         self.gguf_writer.write_kv_data_to_file()
-        self.gguf_writer.write_ti_data_to_file()
+        self.gguf_writer.write_tensors_to_file()
         self.gguf_writer.close()
 
     def write_vocab(self):
diff --git a/gguf-py/gguf/gguf_manager.py b/gguf-py/gguf/gguf_manager.py
index aeec9642c..002d13877 100644
--- a/gguf-py/gguf/gguf_manager.py
+++ b/gguf-py/gguf/gguf_manager.py
@@ -73,33 +73,24 @@ class SplitStrategy(deque):
                 self.append((shard, model[start:stop], GGUFWriter(shard, arch, use_temp_file=use_temp_file, endianess=endianess)))
 
         elif split_arguments.split_style == SplitStyle.SIZE:
-            shards = []
+            shards = [[model[0]]]
 
             # we have to determine the shards first to determine how many shards there will be in total - two passes
-            for i, shard in enumerate(model):
-                if i == 0:
-                    shards.append([shard])
-                    continue
+            for i, shard in enumerate(model[1:]):
                 if SplitStrategy.get_tensor_size(shard[1]) + sum(SplitStrategy.get_tensor_size(t[1]) for t in shards[-1]) > split_arguments.split_max_size:
                     shards.append([shard])
                 else:
                     shards[-1].append(shard)
 
-            total_shards = len(shards) + split_arguments.small_first_shard
-            shard_offset = 1
-
             if split_arguments.small_first_shard:
-                outname = fname_out.with_name(SHARD_NAME_FORMAT.format(fname_out.stem, shard_offset, total_shards))
-                self.append((outname, None, GGUFWriter(outname, arch, use_temp_file=use_temp_file, endianess=endianess)))
-                shard_offset += 1
+                shards.insert(0, None)
 
             for i, shard in enumerate(shards):
-                outname = fname_out.with_name(SHARD_NAME_FORMAT.format(fname_out.stem, i + shard_offset, total_shards))
+                outname = fname_out.with_name(SHARD_NAME_FORMAT.format(fname_out.stem, i + 1, len(shards)))
                 self.append((outname, shard, GGUFWriter(outname, arch, use_temp_file=use_temp_file, endianess=endianess)))
 
     @staticmethod
     def get_tensor_size(tensor) -> int:
-        # we don't have the LazyTensor class here from convert.py but we can try
         try:
             return tensor.data_type.elements_to_bytes(np.prod(tensor.shape))
         except AttributeError: # numpy ndarray[Any, Any]
@@ -213,7 +204,7 @@ class GGUFManager(GGUFWriter):
 
         self.state = WriterState.KV_DATA
 
-    def write_ti_data_to_file(self) -> None:
+    def write_tensors_to_file(self) -> None:
         if self.split_arguments.dry_run:
             return
 
@@ -222,25 +213,17 @@ class GGUFManager(GGUFWriter):
 
         running_total = self.total_tensors
         for ct in range(self.total_shards):
-            try:
-                (_, tensors, writer) = self.split_strategy.popleft()
-                tensors = deque(tensors) if tensors else None
-            except IndexError:
-                break
+            (_, tensors, writer) = self.split_strategy.popleft()
+            tensors = deque(tensors) if tensors else None
 
             shard_num_tensors = len(tensors) if tensors else 0
-
-            if tensors:
-                while True:
-                    try:
-                        (name, tensor, dtype) = tensors.popleft()
-                    except IndexError:
-                        break
-                    writer.add_tensor(name, tensor, raw_dtype=dtype)
-
             print(f"Writing to shard {ct}/{self.total_shards} with {shard_num_tensors}/{running_total} remaining tensors (of {self.total_tensors} total)")
             running_total -= shard_num_tensors
 
+            for _ in range(shard_num_tensors):
+                (name, tensor, dtype) = tensors.popleft()
+                writer.add_tensor(name, tensor, raw_dtype=dtype)
+
             # need to write everything down here
             writer.write_header_to_file()
             writer.write_kv_data_to_file()
@@ -268,8 +251,6 @@ class GGUFManager(GGUFWriter):
         self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
         raw_dtype: GGMLQuantizationType | None = None,
     ) -> None:
-        if self.endianess == GGUFEndian.BIG:
-            tensor.byteswap(inplace=True)
         self.tensors.append((name, tensor, raw_dtype))
 
     def close(self) -> None: