From 822caa46a1e6b7f1445fe99f5fac3be5edabace9 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 3 Apr 2024 15:17:56 +0300
Subject: [PATCH] llama : produce error if imatrix size does not match

---
 llama.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index b768fd3c8..2df03f990 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -13479,6 +13479,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     // placeholder for the meta data
     ::zeros(fout, meta_size);
 
+    const auto tn = LLM_TN(model.arch);
+
     for (int i = 0; i < ml.n_tensors; ++i) {
         struct ggml_tensor * tensor = ml.get_tensor_meta(i);
 
@@ -13562,10 +13564,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                     } else {
                         LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
                                 int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
-                        // REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
-                        //         this is a significant error and it may be good idea to abort the process if this happens,
-                        //         since many people will miss the error and not realize that most of the model is being quantized without an imatrix
-                        //         tok_embd should be ignored in this case, since it always causes this warning
+
+                        // this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
+                        // this is a significant error and it may be good idea to abort the process if this happens,
+                        // since many people will miss the error and not realize that most of the model is being quantized without an imatrix
+                        // tok_embd should be ignored in this case, since it always causes this warning
+                        if (name != tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
+                            throw std::runtime_error(format("imatrix size %d is different from tensor size %d for %s",
+                                    int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name));
+                        }
                     }
                 }
             }