Update llama.cpp - exception for the IQ2_S token embedding error

2024-03-26 10:11:46 +01:00 · 2024-03-26 10:11:46 +01:00 · eaf9571d9b
commit eaf9571d9b
parent d1839362fc
1 changed files with 1 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -12984,8 +12984,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
            }
            if ((new_type == GGML_TYPE_IQ2_XXS ||
                 new_type == GGML_TYPE_IQ2_XS  ||
                 new_type == GGML_TYPE_IQ2_S   ||
                 new_type == GGML_TYPE_IQ1_S   ||
                (new_type == GGML_TYPE_IQ2_S && strcmp(tensor->name, "token_embd.weight"))  ||
                (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
                LLAMA_LOG_ERROR("\n\n============================================================\n");
                LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);