llama : replace ggml_new_tensor_3d + ggml_set_inplace + ggml_set_inplace with single ggml_concat in build_deepseek2()

This commit is contained in:
Stanisław Szymczyk 2024-05-28 11:15:17 +02:00
parent 98ff6e1b45
commit 841cd47432

View file

@ -11305,18 +11305,11 @@ struct llm_build_context {
); );
cb(k_pe, "k_pe", il); cb(k_pe, "k_pe", il);
struct ggml_tensor * q_states = ggml_new_tensor_3d(ctx0, q_nope->type, hparams.n_embd_head_k, n_head, n_tokens); struct ggml_tensor * q_states = ggml_concat(ctx0, q_nope, q_pe, 0);
cb(q_states, "q_states", il); cb(q_states, "q_states", il);
q_states = ggml_set_inplace(ctx0, q_states, q_nope, q_states->nb[1], q_states->nb[2], q_states->nb[3], 0);
q_states = ggml_set_inplace(ctx0, q_states, q_pe, q_states->nb[1], q_states->nb[2], q_states->nb[3], ggml_element_size(q_states) * n_embd_head_qk_nope);
k_pe = ggml_repeat(ctx0, k_pe, q_pe); struct ggml_tensor * k_states = ggml_concat(ctx0, k_nope, ggml_repeat(ctx0, k_pe, q_pe), 0);
cb(k_pe, "k_pe", il);
struct ggml_tensor * k_states = ggml_new_tensor_3d(ctx0, q_nope->type, hparams.n_embd_head_k, n_head, n_tokens);
cb(k_states, "k_states", il); cb(k_states, "k_states", il);
k_states = ggml_set_inplace(ctx0, k_states, k_nope, k_states->nb[1], k_states->nb[2], k_states->nb[3], 0);
k_states = ggml_set_inplace(ctx0, k_states, k_pe, k_states->nb[1], k_states->nb[2], k_states->nb[3], ggml_element_size(k_states) * n_embd_head_qk_nope);
cur = llm_build_kv(ctx0, model, hparams, cparams, kv_self, gf, cur = llm_build_kv(ctx0, model, hparams, cparams, kv_self, gf,
model.layers[il].wo, NULL, model.layers[il].wo, NULL,