From f342143e92c6adcf45db6a6eef605f0d837646f2 Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Fri, 12 Jan 2024 17:41:07 +0200
Subject: [PATCH] imatrix: guard even more against low-bit quantization misuse

---
 llama.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/llama.cpp b/llama.cpp
index bc7fccb5e..2e6c61732 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -9240,6 +9240,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                     }
                 }
             }
+            if ((new_type == GGML_TYPE_IQ2_XXS ||
+                 new_type == GGML_TYPE_IQ2_XS  ||
+                (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
+                fprintf(stderr, "\n\n============================================================\n");
+                fprintf(stderr, "Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
+                fprintf(stderr, "The result will be garbage, so bailing out\n");
+                fprintf(stderr, "============================================================\n\n");
+                throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
+            }
 
             float * f32_data;