From 6f9d1275a017afeb129172fd99b7bbf9b971a433 Mon Sep 17 00:00:00 2001
From: Bruno Pio <913963+blap@users.noreply.github.com>
Date: Wed, 18 Sep 2024 20:00:25 -0300
Subject: [PATCH] Update convert_hf_to_gguf.py

GGUF conversion for HF1BitLLM/Llama3-8B-1.58-100B-tokens: https://huggingface.co/HF1BitLLM/Llama3-8B-1.58-100B-tokens/discussions/3
---
 convert_hf_to_gguf.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index ff4c9226f..c37dcbccd 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -164,8 +164,19 @@ class Model:
                 for name in model_part.keys():
                     if self.is_safetensors:
                         if self.lazy:
+                            if (name.endswith("_scale") and name.removesuffix("_scale") in model_part.keys()):
+                                continue
                             data = model_part.get_slice(name)
                             data = LazyTorchTensor.from_safetensors_slice(data)
+                            if (name + "_scale" in model_part.keys()):
+                                orig_shape = data.shape
+                                scale = model_part.get_slice(name + "_scale")
+                                shift = torch.tensor([0, 2, 4, 6], dtype=torch.uint8).reshape((4, *(1 for _ in range(len(orig_shape)))))
+                                data = data.unsqueeze(0).expand((4, *orig_shape)) >> shift
+                                data = data & 3
+                                data = (data.float() - 1).reshape((orig_shape[0] * 4, *orig_shape[1:]))
+                                # The scale is inverted
+                                data = data / LazyTorchTensor.from_safetensors_slice(scale).float()
                         else:
                             data = model_part.get_tensor(name)
                     else: