llama : produce error if imatrix size does not match

2024-04-03 15:17:56 +03:00 · 2024-04-03 15:17:56 +03:00 · 822caa46a1
commit 822caa46a1
parent fc719b68cf
1 changed files with 11 additions and 4 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -13479,6 +13479,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
    // placeholder for the meta data
    ::zeros(fout, meta_size);

+    const auto tn = LLM_TN(model.arch);
+
    for (int i = 0; i < ml.n_tensors; ++i) {
        struct ggml_tensor * tensor = ml.get_tensor_meta(i);

@ -13562,10 +13564,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                    } else {
                        LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
                                int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
-                        // REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
-                        //         this is a significant error and it may be good idea to abort the process if this happens,
-                        //         since many people will miss the error and not realize that most of the model is being quantized without an imatrix
-                        //         tok_embd should be ignored in this case, since it always causes this warning
+
+                        // this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
+                        // this is a significant error and it may be good idea to abort the process if this happens,
+                        // since many people will miss the error and not realize that most of the model is being quantized without an imatrix
+                        // tok_embd should be ignored in this case, since it always causes this warning
+                        if (name != tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
+                            throw std::runtime_error(format("imatrix size %d is different from tensor size %d for %s",
+                                    int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name));
+                        }
                    }
                }
            }