Update convert_hf_to_gguf.py

GGUF conversion for HF1BitLLM/Llama3-8B-1.58-100B-tokens: https://huggingface.co/HF1BitLLM/Llama3-8B-1.58-100B-tokens/discussions/3
2024-09-18 20:00:25 -03:00 · 2024-09-18 20:00:25 -03:00 · 6f9d1275a0
commit 6f9d1275a0
parent 64c6af3195
1 changed files with 11 additions and 0 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -164,8 +164,19 @@ class Model:
                for name in model_part.keys():
                    if self.is_safetensors:
                        if self.lazy:
+                            if (name.endswith("_scale") and name.removesuffix("_scale") in model_part.keys()):
+                                continue
                            data = model_part.get_slice(name)
                            data = LazyTorchTensor.from_safetensors_slice(data)
+                            if (name + "_scale" in model_part.keys()):
+                                orig_shape = data.shape
+                                scale = model_part.get_slice(name + "_scale")
+                                shift = torch.tensor([0, 2, 4, 6], dtype=torch.uint8).reshape((4, *(1 for _ in range(len(orig_shape)))))
+                                data = data.unsqueeze(0).expand((4, *orig_shape)) >> shift
+                                data = data & 3
+                                data = (data.float() - 1).reshape((orig_shape[0] * 4, *orig_shape[1:]))
+                                # The scale is inverted
+                                data = data / LazyTorchTensor.from_safetensors_slice(scale).float()
                        else:
                            data = model_part.get_tensor(name)
                    else: