From 841cd47432affbca30a62aa0f9429a80599074d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Tue, 28 May 2024 11:15:17 +0200 Subject: [PATCH] llama : replace ggml_new_tensor_3d + ggml_set_inplace + ggml_set_inplace with single ggml_concat in build_deepseek2() --- llama.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/llama.cpp b/llama.cpp index cef5bfdde..9c80a6211 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11305,18 +11305,11 @@ struct llm_build_context { ); cb(k_pe, "k_pe", il); - struct ggml_tensor * q_states = ggml_new_tensor_3d(ctx0, q_nope->type, hparams.n_embd_head_k, n_head, n_tokens); + struct ggml_tensor * q_states = ggml_concat(ctx0, q_nope, q_pe, 0); cb(q_states, "q_states", il); - q_states = ggml_set_inplace(ctx0, q_states, q_nope, q_states->nb[1], q_states->nb[2], q_states->nb[3], 0); - q_states = ggml_set_inplace(ctx0, q_states, q_pe, q_states->nb[1], q_states->nb[2], q_states->nb[3], ggml_element_size(q_states) * n_embd_head_qk_nope); - k_pe = ggml_repeat(ctx0, k_pe, q_pe); - cb(k_pe, "k_pe", il); - - struct ggml_tensor * k_states = ggml_new_tensor_3d(ctx0, q_nope->type, hparams.n_embd_head_k, n_head, n_tokens); + struct ggml_tensor * k_states = ggml_concat(ctx0, k_nope, ggml_repeat(ctx0, k_pe, q_pe), 0); cb(k_states, "k_states", il); - k_states = ggml_set_inplace(ctx0, k_states, k_nope, k_states->nb[1], k_states->nb[2], k_states->nb[3], 0); - k_states = ggml_set_inplace(ctx0, k_states, k_pe, k_states->nb[1], k_states->nb[2], k_states->nb[3], ggml_element_size(k_states) * n_embd_head_qk_nope); cur = llm_build_kv(ctx0, model, hparams, cparams, kv_self, gf, model.layers[il].wo, NULL,