From d0d32dced9dd206c645241fd44487980301d33b4 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Fri, 8 Mar 2024 10:06:33 -0500 Subject: [PATCH] convert-hf : omit output.weight when identical with token_embd.weight Only for Mamba for now, but it might be relevant for other models eventually. Most Mamba models actually share these two tensors, albeit implicitly. --- convert-hf-to-gguf.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 3318be35c..5eee32016 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1913,6 +1913,11 @@ class MambaModel(Model): def write_tensors(self): block_count = self.hparams["n_layer"] tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count) + + tok_embd = None + tok_embd_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.TOKEN_EMBD] + ".weight" + output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight" + for name, data_torch in self.get_tensors(): old_dtype = data_torch.dtype @@ -1930,6 +1935,14 @@ class MambaModel(Model): print("A_log --> A ==> " + new_name) data_torch = -torch.exp(data_torch) + # assuming token_embd.weight is seen before output.weight + if tok_embd is not None and new_name == output_name: + if torch.equal(tok_embd, data_torch): + print(f"{output_name} is equivalent to {tok_embd_name}, omitting") + continue + if new_name == tok_embd_name: + tok_embd = data_torch + data = data_torch.squeeze().numpy() n_dims = len(data.shape)