From 6f9d1275a017afeb129172fd99b7bbf9b971a433 Mon Sep 17 00:00:00 2001 From: Bruno Pio <913963+blap@users.noreply.github.com> Date: Wed, 18 Sep 2024 20:00:25 -0300 Subject: [PATCH] Update convert_hf_to_gguf.py GGUF conversion for HF1BitLLM/Llama3-8B-1.58-100B-tokens: https://huggingface.co/HF1BitLLM/Llama3-8B-1.58-100B-tokens/discussions/3 --- convert_hf_to_gguf.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ff4c9226f..c37dcbccd 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -164,8 +164,19 @@ class Model: for name in model_part.keys(): if self.is_safetensors: if self.lazy: + if (name.endswith("_scale") and name.removesuffix("_scale") in model_part.keys()): + continue data = model_part.get_slice(name) data = LazyTorchTensor.from_safetensors_slice(data) + if (name + "_scale" in model_part.keys()): + orig_shape = data.shape + scale = model_part.get_slice(name + "_scale") + shift = torch.tensor([0, 2, 4, 6], dtype=torch.uint8).reshape((4, *(1 for _ in range(len(orig_shape))))) + data = data.unsqueeze(0).expand((4, *orig_shape)) >> shift + data = data & 3 + data = (data.float() - 1).reshape((orig_shape[0] * 4, *orig_shape[1:])) + # The scale is inverted + data = data / LazyTorchTensor.from_safetensors_slice(scale).float() else: data = model_part.get_tensor(name) else: