convert-hf : omit output.weight when identical with token_embd.weight

Only for Mamba for now, but it might be relevant for other models eventually. Most Mamba models actually share these two tensors, albeit implicitly.
2024-03-08 10:06:33 -05:00 · 2024-03-08 10:06:33 -05:00 · d0d32dced9
commit d0d32dced9
parent 1c8ea55843
1 changed files with 13 additions and 0 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -1913,6 +1913,11 @@ class MambaModel(Model):
    def write_tensors(self):
        block_count = self.hparams["n_layer"]
        tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
+
+        tok_embd = None
+        tok_embd_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.TOKEN_EMBD] + ".weight"
+        output_name   = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT]     + ".weight"
+
        for name, data_torch in self.get_tensors():
            old_dtype = data_torch.dtype

@ -1930,6 +1935,14 @@ class MambaModel(Model):
                print("A_log --> A ==> " + new_name)
                data_torch = -torch.exp(data_torch)

+            # assuming token_embd.weight is seen before output.weight
+            if tok_embd is not None and new_name == output_name:
+                if torch.equal(tok_embd, data_torch):
+                    print(f"{output_name} is equivalent to {tok_embd_name}, omitting")
+                    continue
+            if new_name == tok_embd_name:
+                tok_embd = data_torch
+
            data = data_torch.squeeze().numpy()

            n_dims = len(data.shape)