llama : use n_embd_head_v instead of n_embd_head_k when reshaping kqv

2024-05-16 13:42:24 +02:00 · 2024-05-16 13:42:24 +02:00 · f15e933fb1
commit f15e933fb1
parent 9afdffe70e
1 changed files with 2 additions and 2 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -6655,7 +6655,7 @@ static struct ggml_tensor * llm_build_kqv(
            ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
        }

-        cur = ggml_reshape_2d(ctx, cur, n_embd_head_k*n_head, n_tokens);
+        cur = ggml_reshape_2d(ctx, cur, n_embd_head_v*n_head, n_tokens);
    } else {
        struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
        cb(kq, "kq", il);
@ -6700,7 +6700,7 @@ static struct ggml_tensor * llm_build_kqv(
        struct ggml_tensor * kqv_merged = ggml_permute(ctx, kqv, 0, 2, 1, 3);
        cb(kqv_merged, "kqv_merged", il);

-        cur = ggml_cont_2d(ctx, kqv_merged, n_embd_head_k*n_head, n_tokens);
+        cur = ggml_cont_2d(ctx, kqv_merged, n_embd_head_v*n_head, n_tokens);
        cb(cur, "kqv_merged_cont", il);
    }