diff --git a/llama.cpp b/llama.cpp index a9437a5b5..8db50e0fd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7122,16 +7122,16 @@ struct llm_build_context { inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } - cur = llm_build_norm(ctx0, cur, hparams, - model.layers[il].attn_out_norm, NULL, - LLM_NORM, cb, il); - cb(cur, "attn_out_norm", il); - struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); // feed-forward network // MoE branch + cur = llm_build_norm(ctx0, cur, hparams, + model.layers[il].attn_out_norm, NULL, + LLM_NORM, cb, il); + cb(cur, "attn_out_norm", il); + cur = build_moe(cur, n_tokens, il); cur = ggml_add(ctx0, cur, ffn_inp);