From ea8b58c6cd18db632fd3b04f8046269324b91a17 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 8 Apr 2024 21:10:49 +0200 Subject: [PATCH] llama: dbrx: first add the residuals and then do the norm --- llama.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama.cpp b/llama.cpp index a9437a5b5..8db50e0fd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7122,16 +7122,16 @@ struct llm_build_context { inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } - cur = llm_build_norm(ctx0, cur, hparams, - model.layers[il].attn_out_norm, NULL, - LLM_NORM, cb, il); - cb(cur, "attn_out_norm", il); - struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); // feed-forward network // MoE branch + cur = llm_build_norm(ctx0, cur, hparams, + model.layers[il].attn_out_norm, NULL, + LLM_NORM, cb, il); + cb(cur, "attn_out_norm", il); + cur = build_moe(cur, n_tokens, il); cur = ggml_add(ctx0, cur, ffn_inp);