From b14c457fb46b25c6cc8554610264b0c234edc9b7 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Thu, 8 Feb 2024 21:22:23 -0500 Subject: [PATCH] bert : add some missing graph callbacks --- llama.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llama.cpp b/llama.cpp index 8a0189c5d..38a5fb876 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5752,6 +5752,7 @@ struct llm_build_context { // embed layer norm inpL = llm_build_norm(ctx0, inpL, hparams, model.tok_norm, model.tok_norm_b, LLM_NORM, cb, -1); + cb(inpL, "inp_norm", -1); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); @@ -5788,6 +5789,7 @@ struct llm_build_context { cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm, model.layers[il].attn_norm_b, LLM_NORM, cb, il); struct ggml_tensor * ffn_inp = cur; + cb(ffn_inp, "ffn_inp", il); // feed-forward network cur = llm_build_ffn(ctx0, cur, @@ -5796,6 +5798,7 @@ struct llm_build_context { model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, LLM_FFN_GELU, LLM_FFN_SEQ, cb, il); + cb(cur, "ffn_out", il); // attentions bypass the intermediate layer cur = ggml_add(ctx0, cur, ffn_inp);