llama : replace (permute + reshape + view_1d) with (view_3d)
ggml-ci
This commit is contained in:
parent
3d9a551816
commit
5ddfbffbaf
1 changed files with 22 additions and 8 deletions
30
llama.cpp
30
llama.cpp
|
@ -1593,12 +1593,26 @@ static struct ggml_cgraph * llama_build_graph(
|
||||||
offload_func_kq(Q);
|
offload_func_kq(Q);
|
||||||
ggml_set_name(Q, "Q");
|
ggml_set_name(Q, "Q");
|
||||||
|
|
||||||
|
//struct ggml_tensor * K =
|
||||||
|
// ggml_permute(ctx0,
|
||||||
|
// ggml_reshape_3d(ctx0,
|
||||||
|
// ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa),
|
||||||
|
// n_embd_head, n_head_kv, n_past + N),
|
||||||
|
// 0, 2, 1, 3);
|
||||||
|
//struct ggml_tensor * K =
|
||||||
|
// ggml_permute(ctx0,
|
||||||
|
// ggml_view_3d(ctx0, kv_self.k,
|
||||||
|
// n_embd_head, n_head_kv, n_past + N,
|
||||||
|
// ggml_element_size(kv_self.k)*n_embd_head,
|
||||||
|
// ggml_element_size(kv_self.k)*n_embd_gqa,
|
||||||
|
// ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
||||||
|
// 0, 2, 1, 3);
|
||||||
struct ggml_tensor * K =
|
struct ggml_tensor * K =
|
||||||
ggml_permute(ctx0,
|
ggml_view_3d(ctx0, kv_self.k,
|
||||||
ggml_reshape_3d(ctx0,
|
n_embd_head, n_past + N, n_head_kv,
|
||||||
ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa),
|
ggml_element_size(kv_self.k)*n_embd_gqa,
|
||||||
n_embd_head, n_head_kv, n_past + N),
|
ggml_element_size(kv_self.k)*n_embd_head,
|
||||||
0, 2, 1, 3);
|
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il);
|
||||||
offload_func_kq(K);
|
offload_func_kq(K);
|
||||||
ggml_set_name(K, "K");
|
ggml_set_name(K, "K");
|
||||||
|
|
||||||
|
@ -1627,9 +1641,9 @@ static struct ggml_cgraph * llama_build_graph(
|
||||||
struct ggml_tensor * V =
|
struct ggml_tensor * V =
|
||||||
ggml_view_3d(ctx0, kv_self.v,
|
ggml_view_3d(ctx0, kv_self.v,
|
||||||
n_past + N, n_embd_head, n_head_kv,
|
n_past + N, n_embd_head, n_head_kv,
|
||||||
n_ctx*ggml_element_size(kv_self.v),
|
ggml_element_size(kv_self.v)*n_ctx,
|
||||||
n_ctx*ggml_element_size(kv_self.v)*n_embd_head,
|
ggml_element_size(kv_self.v)*n_ctx*n_embd_head,
|
||||||
n_ctx*ggml_element_size(kv_self.v)*n_embd_gqa*il);
|
ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il);
|
||||||
offload_func_v(V);
|
offload_func_v(V);
|
||||||
ggml_set_name(V, "V");
|
ggml_set_name(V, "V");
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue