diff --git a/llama.cpp b/llama.cpp index 50109e95f..aff381659 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6622,6 +6622,7 @@ static struct ggml_tensor * llm_build_kqv( const int64_t n_embd_head_k = hparams.n_embd_head_k; const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa(); const int64_t n_embd_head_v = hparams.n_embd_head_v; + const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa(); struct ggml_tensor * q = ggml_permute(ctx, q_cur, 0, 2, 1, 3); cb(q, "q", il); @@ -6644,8 +6645,8 @@ static struct ggml_tensor * llm_build_kqv( struct ggml_tensor * v = ggml_view_3d(ctx, kv.v_l[il], n_embd_head_v, n_kv, n_head_kv, - ggml_row_size(kv.v_l[il]->type, n_embd_k_gqa), - ggml_row_size(kv.v_l[il]->type, n_embd_head_k), + ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa), + ggml_row_size(kv.v_l[il]->type, n_embd_head_v), 0); cb(v, "v", il);