From 596a4aec86b0db7c397ac9c8e407a7ff0a9fdd4f Mon Sep 17 00:00:00 2001 From: Nexesenex <124105151+Nexesenex@users.noreply.github.com> Date: Thu, 22 Aug 2024 19:12:25 +0200 Subject: [PATCH] Readd variable attn_k, attn_q, attn_o after merge --- src/llama.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama.cpp b/src/llama.cpp index 6191741ff..a91ed5c60 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17328,6 +17328,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s n_attn_layer *= 3; } GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected"); + GGML_ASSERT((qs.n_attention_wk == n_attn_layer) && "n_attention_wk is unexpected"); + GGML_ASSERT((qs.n_attention_wq == n_attn_layer) && "n_attention_wq is unexpected"); + GGML_ASSERT((qs.n_attention_wo == n_attn_layer) && "n_attention_wo is unexpected"); } size_t total_size_org = 0;