From fa568f6a8293327f448a0aa065946b26130ea825 Mon Sep 17 00:00:00 2001 From: nopperl <54780682+nopperl@users.noreply.github.com> Date: Thu, 18 Jul 2024 09:30:40 +0200 Subject: [PATCH] check for k norm separately --- src/llama.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 138294600..2a1556ea8 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -13863,17 +13863,20 @@ struct llm_build_context { ggml_element_size(Qcur) * n_embd_head * n_head, 0); cb(Qcur, "Qcur", il); - Kcur = ggml_view_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens, - ggml_element_size(Kcur) * n_embd_head, - ggml_element_size(Kcur) * n_embd_head * n_head_kv, - 0); - cb(Kcur, "Kcur", il); Qcur = llm_build_norm(ctx0, Qcur, hparams, model.layers[il].attn_q_norm, model.layers[il].attn_q_norm_b, LLM_NORM, cb, il); cb(Qcur, "Qcur", il); + } + + if (model.layers[il].attn_k_norm) { + Kcur = ggml_view_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens, + ggml_element_size(Kcur) * n_embd_head, + ggml_element_size(Kcur) * n_embd_head * n_head_kv, + 0); + cb(Kcur, "Kcur", il); Kcur = llm_build_norm(ctx0, Kcur, hparams, model.layers[il].attn_k_norm,