From ea8b58c6cd18db632fd3b04f8046269324b91a17 Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Mon, 8 Apr 2024 21:10:49 +0200
Subject: [PATCH] llama: dbrx: first add the residuals and then do the norm

---
 llama.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index a9437a5b5..8db50e0fd 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7122,16 +7122,16 @@ struct llm_build_context {
                 inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
             }
 
-            cur = llm_build_norm(ctx0, cur, hparams,
-                                 model.layers[il].attn_out_norm, NULL,
-                                 LLM_NORM, cb, il);
-            cb(cur, "attn_out_norm", il);
-
             struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
             cb(ffn_inp, "ffn_inp", il);
 
             // feed-forward network
             // MoE branch
+            cur = llm_build_norm(ctx0, cur, hparams,
+                                 model.layers[il].attn_out_norm, NULL,
+                                 LLM_NORM, cb, il);
+            cb(cur, "attn_out_norm", il);
+
             cur = build_moe(cur, n_tokens, il);
 
             cur = ggml_add(ctx0, cur, ffn_inp);