llama : produce error if imatrix size does not match

This commit is contained in:
Georgi Gerganov 2024-04-03 15:17:56 +03:00
parent fc719b68cf
commit 822caa46a1
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -13479,6 +13479,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// placeholder for the meta data
::zeros(fout, meta_size);
const auto tn = LLM_TN(model.arch);
for (int i = 0; i < ml.n_tensors; ++i) {
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
@ -13562,10 +13564,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
} else {
LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
// REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
// this is a significant error and it may be good idea to abort the process if this happens,
// since many people will miss the error and not realize that most of the model is being quantized without an imatrix
// tok_embd should be ignored in this case, since it always causes this warning
// this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
// this is a significant error and it may be good idea to abort the process if this happens,
// since many people will miss the error and not realize that most of the model is being quantized without an imatrix
// tok_embd should be ignored in this case, since it always causes this warning
if (name != tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
throw std::runtime_error(format("imatrix size %d is different from tensor size %d for %s",
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name));
}
}
}
}