From 5ddfbffbaff1b6acffc966c0848c4d3a86031d45 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 7 Aug 2023 12:32:58 +0300 Subject: [PATCH] llama : replace (permute + reshape + view_1d) with (view_3d) ggml-ci --- llama.cpp | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/llama.cpp b/llama.cpp index 39aefd499..4dc380aa7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1593,12 +1593,26 @@ static struct ggml_cgraph * llama_build_graph( offload_func_kq(Q); ggml_set_name(Q, "Q"); + //struct ggml_tensor * K = + // ggml_permute(ctx0, + // ggml_reshape_3d(ctx0, + // ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa), + // n_embd_head, n_head_kv, n_past + N), + // 0, 2, 1, 3); + //struct ggml_tensor * K = + // ggml_permute(ctx0, + // ggml_view_3d(ctx0, kv_self.k, + // n_embd_head, n_head_kv, n_past + N, + // ggml_element_size(kv_self.k)*n_embd_head, + // ggml_element_size(kv_self.k)*n_embd_gqa, + // ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il), + // 0, 2, 1, 3); struct ggml_tensor * K = - ggml_permute(ctx0, - ggml_reshape_3d(ctx0, - ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa), - n_embd_head, n_head_kv, n_past + N), - 0, 2, 1, 3); + ggml_view_3d(ctx0, kv_self.k, + n_embd_head, n_past + N, n_head_kv, + ggml_element_size(kv_self.k)*n_embd_gqa, + ggml_element_size(kv_self.k)*n_embd_head, + ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il); offload_func_kq(K); ggml_set_name(K, "K"); @@ -1627,9 +1641,9 @@ static struct ggml_cgraph * llama_build_graph( struct ggml_tensor * V = ggml_view_3d(ctx0, kv_self.v, n_past + N, n_embd_head, n_head_kv, - n_ctx*ggml_element_size(kv_self.v), - n_ctx*ggml_element_size(kv_self.v)*n_embd_head, - n_ctx*ggml_element_size(kv_self.v)*n_embd_gqa*il); + ggml_element_size(kv_self.v)*n_ctx, + ggml_element_size(kv_self.v)*n_ctx*n_embd_head, + ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il); offload_func_v(V); ggml_set_name(V, "V");