From 822caa46a1e6b7f1445fe99f5fac3be5edabace9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 3 Apr 2024 15:17:56 +0300 Subject: [PATCH] llama : produce error if imatrix size does not match --- llama.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index b768fd3c8..2df03f990 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13479,6 +13479,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // placeholder for the meta data ::zeros(fout, meta_size); + const auto tn = LLM_TN(model.arch); + for (int i = 0; i < ml.n_tensors; ++i) { struct ggml_tensor * tensor = ml.get_tensor_meta(i); @@ -13562,10 +13564,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } else { LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__, int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name); - // REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix - // this is a significant error and it may be good idea to abort the process if this happens, - // since many people will miss the error and not realize that most of the model is being quantized without an imatrix - // tok_embd should be ignored in this case, since it always causes this warning + + // this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix + // this is a significant error and it may be good idea to abort the process if this happens, + // since many people will miss the error and not realize that most of the model is being quantized without an imatrix + // tok_embd should be ignored in this case, since it always causes this warning + if (name != tn(LLM_TENSOR_TOKEN_EMBD, "weight")) { + throw std::runtime_error(format("imatrix size %d is different from tensor size %d for %s", + int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name)); + } } } }