From 35a0b974e32df2ef2433de42f44ab01b6e5c55f0 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Tue, 22 Aug 2023 08:51:13 +0300 Subject: [PATCH] Fix after rebasing on master --- llama.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index 0947918e6..ebec2a4f0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3718,7 +3718,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } } else if (name.find("attn_v.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K; - } else if (tensor.name.find("attention.wv.weight") != std::string::npos) { + } else if (name.find("attention.wv.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) { new_type = i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K; @@ -3733,7 +3733,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s ++i_attention_wv; } else if (name.find("ffn_down.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K; - } else if (tensor.name.find("feed_forward.w2.weight") != std::string::npos) { + } else if (name.find("feed_forward.w2.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) { new_type = i_feed_forward_w2 < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K; @@ -3745,12 +3745,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s ++i_feed_forward_w2; } else if (name.find("attn_output.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K; - } else if (tensor.name.find("attention.wo.weight") != std::string::npos) { + } else if (name.find("attention.wo.weight") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) new_type = GGML_TYPE_Q4_K; else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K; } - else if (tensor.name.find("feed_forward.w") != std::string::npos) { + else if (name.find("feed_forward.w") != std::string::npos) { if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; } // This can be used to reduce the size of the Q5_K_S model.