From f342143e92c6adcf45db6a6eef605f0d837646f2 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Fri, 12 Jan 2024 17:41:07 +0200 Subject: [PATCH] imatrix: guard even more against low-bit quantization misuse --- llama.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llama.cpp b/llama.cpp index bc7fccb5e..2e6c61732 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9240,6 +9240,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } } } + if ((new_type == GGML_TYPE_IQ2_XXS || + new_type == GGML_TYPE_IQ2_XS || + (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) { + fprintf(stderr, "\n\n============================================================\n"); + fprintf(stderr, "Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name); + fprintf(stderr, "The result will be garbage, so bailing out\n"); + fprintf(stderr, "============================================================\n\n"); + throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name)); + } float * f32_data;