From 0f3bf7c96b568267ab86be86c998609cfdc539b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Thu, 23 May 2024 08:34:11 +0200 Subject: [PATCH] llama : add comments for clarity, change confusing variable name --- llama.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index 8e764dab5..f2a1ff8b1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10743,9 +10743,12 @@ struct llm_build_context { inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } - // FF + // ffn if (hparams.use_par_res) { - struct ggml_tensor * ffn_inp = cur; + // attention and ffn are computed in parallel + // x = x + attn(ln1(x)) + ffn(ln2(x)) + + struct ggml_tensor * attn_out = cur; cur = llm_build_norm(ctx0, inpL, hparams, model.layers[il].ffn_norm, @@ -10764,10 +10767,13 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, inpL); cb(cur, "ffn_out", il); - inpL = ggml_add(ctx0, cur, ffn_inp); + inpL = ggml_add(ctx0, cur, attn_out); cb(inpL, "l_out", il); } else { - // Add the input + // attention and ffn are computed sequentially + // x = x + attn(ln1(x)) + // x = x + ffn(ln2(x)) + struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL); cb(ffn_inp, "ffn_inp", il);