llama : produce error if imatrix size does not match
This commit is contained in:
parent
fc719b68cf
commit
822caa46a1
1 changed files with 11 additions and 4 deletions
15
llama.cpp
15
llama.cpp
|
@ -13479,6 +13479,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
// placeholder for the meta data
|
// placeholder for the meta data
|
||||||
::zeros(fout, meta_size);
|
::zeros(fout, meta_size);
|
||||||
|
|
||||||
|
const auto tn = LLM_TN(model.arch);
|
||||||
|
|
||||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||||
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
|
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
|
||||||
|
|
||||||
|
@ -13562,10 +13564,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
} else {
|
} else {
|
||||||
LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
|
LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
|
||||||
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
|
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
|
||||||
// REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
|
|
||||||
// this is a significant error and it may be good idea to abort the process if this happens,
|
// this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
|
||||||
// since many people will miss the error and not realize that most of the model is being quantized without an imatrix
|
// this is a significant error and it may be good idea to abort the process if this happens,
|
||||||
// tok_embd should be ignored in this case, since it always causes this warning
|
// since many people will miss the error and not realize that most of the model is being quantized without an imatrix
|
||||||
|
// tok_embd should be ignored in this case, since it always causes this warning
|
||||||
|
if (name != tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
|
||||||
|
throw std::runtime_error(format("imatrix size %d is different from tensor size %d for %s",
|
||||||
|
int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue