minor tweaks

2024-07-01 15:29:04 -07:00 · 2024-07-01 15:29:04 -07:00 · 8b64c7ae46
commit 8b64c7ae46
parent 2d4de517bb
2 changed files with 13 additions and 9 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -2972,6 +2972,8 @@ class JaisModel(Model):
        else:
            assert False

+        self.max_alibi_bias = 8.0
+
    def set_vocab(self):
        self._set_vocab_gpt2()

@ -2985,12 +2987,6 @@ class JaisModel(Model):
        self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
        self.gguf_writer.add_file_type(self.ftype)

-        # Hack to populate self.tensor_names
-        all(self.get_tensors())
-        if 'transformer.relative_pe.slopes' not in self.tensor_names:
-            self.gguf_writer.add_max_alibi_bias(8.0)
-        # else set later
-
    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        del bid  # unused

@ -3001,11 +2997,14 @@ class JaisModel(Model):
            return tensors

        if name.endswith(("relative_pe.slopes")):
-            # calculate ALiBi bias
+            # Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
+            # Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
+            # but Jais's PyTorch model simply precalculates the slope values and places them
+            # in relative_pes.slopes
            n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
            first_val = float(data_torch._data[0])
-            alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
-            self.gguf_writer.add_max_alibi_bias(alibi_bias)
+            self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
+
            return tensors

        if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
@ -3025,6 +3024,10 @@ class JaisModel(Model):

        return tensors

+    def write_tensors(self):
+        super().write_tensors()
+        self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
+

 ###### CONVERSION LOGIC ######

--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -6942,6 +6942,7 @@ static bool llm_load_tensors(
            case LLM_ARCH_BITNET:
                {
                    model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
+
                    // output
                    {
                        model.output_norm = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});