From eaf9571d9b85712d9e37843e76abaa9ad85bc3a8 Mon Sep 17 00:00:00 2001 From: Nexesenex <124105151+Nexesenex@users.noreply.github.com> Date: Tue, 26 Mar 2024 10:11:46 +0100 Subject: [PATCH] Update llama.cpp - exception for the IQ2_S token embedding error --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 68f18453b..b940ea3d2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12984,8 +12984,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } if ((new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_XS || - new_type == GGML_TYPE_IQ2_S || new_type == GGML_TYPE_IQ1_S || + (new_type == GGML_TYPE_IQ2_S && strcmp(tensor->name, "token_embd.weight")) || (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) { LLAMA_LOG_ERROR("\n\n============================================================\n"); LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);