From 639b374b1a86d3d1e7d374586ed312d3887ff6a9 Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Wed, 1 May 2024 19:02:34 -0400
Subject: [PATCH] convert-hf : convert norms to f32 by default

---
 convert-hf-to-gguf.py | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 1dec1e583..a846b63fe 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -199,17 +199,21 @@ class Model(Protocol):
                 if self.ftype == 0 and data_dtype == np.float16:
                     data = data.astype(np.float32)
 
-                # when both are true, the tensor keeps its original type
+                # when both are True, f32 should win
                 extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
                 extra_f16 = self.extra_f16_tensors(name, new_name, bid, n_dims)
 
-                # 1d tensors need to be converted to float32
-                # Most of the codebase that takes in 1D tensors only handles F32 tensors
-                if self.ftype == 1 and data_dtype == np.float16 and (n_dims == 1 or extra_f32) and not extra_f16:
-                    data = data.astype(np.float32)
+                # Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
+                extra_f32 = extra_f32 or n_dims == 1 or new_name.endswith("_norm.weight")
 
                 # if f16 desired, convert any float32 2-dim weight tensors to float16
-                if self.ftype == 1 and data_dtype == np.float32 and (name.endswith(".weight") and n_dims >= 2 or extra_f16) and not extra_f32:
+                extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)
+
+                # when both extra_f32 and extra_f16 are False, convert to float32 by default
+                if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
+                    data = data.astype(np.float32)
+
+                if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
                     data = data.astype(np.float16)
 
                 # reverse shape to make it similar to the internal ggml dimension order
@@ -1100,11 +1104,6 @@ class StableLMModel(Model):
 
         return [(self.map_tensor_name(name), data_torch)]
 
-    def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
-        del name, bid, n_dims  # unused
-
-        return new_name.endswith("_norm.weight")
-
     def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_name: str = "q_layernorm"):
         datas: list[Tensor] = []
         # extract the norms in order
@@ -1505,11 +1504,6 @@ class Qwen2MoeModel(Model):
 
         return [(self.map_tensor_name(name), data_torch)]
 
-    def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
-        del name, bid, n_dims  # unused
-
-        return new_name.endswith("_norm.weight")
-
     def write_tensors(self):
         super().write_tensors()