From 54f808db2bae036a370a4b990e8fabe8aa8aced0 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 29 May 2023 22:09:46 +0300 Subject: [PATCH] Quantization mixes: didn't quite get what I wanted in the last commit --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 34ca296ea..7f6b86ab4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -587,6 +587,7 @@ struct llama_file_saver { case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: break; @@ -2172,8 +2173,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s (i_feed_forward_w2 - n_feed_forward_w2/8)%3 == 2)) new_type = GGML_TYPE_Q6_K; ++i_feed_forward_w2; } - else if (tensor.name.find("feed_forward.w3.weight") != std::string::npos || - tensor.name.find("attention.wo.weight") != std::string::npos) { + else if (tensor.name.find("attention.wo.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) new_type = GGML_TYPE_Q4_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K; }