llama : produce error if imatrix size does not match
This commit is contained in:
parent
fc719b68cf
commit
822caa46a1
1 changed files with 11 additions and 4 deletions
|
@ -13479,6 +13479,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
// placeholder for the meta data
|
||||
::zeros(fout, meta_size);
|
||||
|
||||
const auto tn = LLM_TN(model.arch);
|
||||
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
|
||||
|
||||
|
@ -13562,10 +13564,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
} else {
|
||||
LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
|
||||
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
|
||||
// REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
|
||||
|
||||
// this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
|
||||
// this is a significant error and it may be good idea to abort the process if this happens,
|
||||
// since many people will miss the error and not realize that most of the model is being quantized without an imatrix
|
||||
// tok_embd should be ignored in this case, since it always causes this warning
|
||||
if (name != tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
|
||||
throw std::runtime_error(format("imatrix size %d is different from tensor size %d for %s",
|
||||
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue