Loading works up to LayerNorm2D

2024-04-04 22:30:17 +01:00 · 2024-04-04 22:30:17 +01:00 · e4b2e2d339
commit e4b2e2d339
parent fbab98497b
2 changed files with 25 additions and 3 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -2344,8 +2344,7 @@ class CommandR2Model(Model):

        # max_position_embeddings = 8192 in config.json but model was actually
        # trained on 128k context length
-        if "model_max_length" not in self.hparams:
-            self.hparams["model_max_length"] =  131072
+
        self.hparams["max_position_embeddings"] = self.hparams["model_max_length"]

    def set_gguf_parameters(self):
--- a/llama.cpp
+++ b/llama.cpp
@ -924,6 +924,8 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
+            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
+            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
        },
    },
    {
@ -5403,7 +5405,13 @@ static bool llm_load_tensors(
                        auto & layer = model.layers[i];

                        layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
-
+                        
+                        if(n_layer >= 64)
+                        {
+                            layer.attn_q_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {hparams.n_embd_head_k, hparams.n_head});
+                            layer.attn_k_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {hparams.n_embd_head_k, hparams.n_head_kv});
+                        }
+                        
                        layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q,   "weight", i), {n_embd, n_embd});
                        layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K,   "weight", i), {n_embd, n_embd_gqa});
                        layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V,   "weight", i), {n_embd, n_embd_gqa});
@ -9452,6 +9460,21 @@ struct llm_build_context {
                    cb(Vcur, "Vcur", il);
                }

+                if(model.layers[il].attn_q_norm)
+                {
+                    Qcur = llm_build_norm(ctx0, Qcur, hparams,
+                                model.layers[il].attn_q_norm,
+                                NULL,
+                                LLM_NORM, cb, il);
+                    cb(Qcur, "Qcur", il);
+
+                    Kcur = llm_build_norm(ctx0, Kcur, hparams,
+                            model.layers[il].attn_k_norm,
+                            NULL,
+                            LLM_NORM, cb, il);
+                    cb(Kcur, "Kcur", il);
+                }
+
                Qcur = ggml_rope_custom(
                    ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
                    n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,