add review note

This commit is contained in:
slaren 2024-04-03 02:10:43 +02:00
parent a1343aeb8a
commit 19dafafd5f

View file

@ -13575,7 +13575,11 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
imatrix = it->second.data(); imatrix = it->second.data();
} else { } else {
LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__, LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
int(it->second.size()), int(tensor->ne[0]), tensor->name); int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
// REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
// this is a significant error and it may be good idea to abort the process if this happens,
// since many people will miss the error and not realize that most of the model is being quantized without an imatrix
// tok_embd should be ignored in this case, since it always causes this warning
} }
} }
} }