diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 83b85d72b..ccbd13f26 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -250,8 +250,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { } if (n_zeros > 0) { - fprintf(stderr, "%s: entry '%40s' has partial data (%.2f%%) - skipping\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all); - continue; + fprintf(stderr, "%s: entry '%40s' has partial data (%.2f%%)\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all); } n_entries++; @@ -275,7 +274,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { if (nval > 0) { std::vector tmp(nval); for (int i = 0; i < nval; i++) { - tmp[i] = (stat.values[i] / static_cast(stat.counts[i])) * static_cast(stat.ncall); + tmp[i] = stat.counts[i] ? (stat.values[i] / static_cast(stat.counts[i])) * static_cast(stat.ncall) : 0.0f; } out.write((const char*)tmp.data(), nval*sizeof(float)); } @@ -344,8 +343,10 @@ bool IMatrixCollector::load_imatrix(const char * fname) { // Recreate the state as expected by save_imatrix(), and corerct for weighted sum. for (int i = 0; i < nval; i++) { - e.values[i] += tmp[i]; - e.counts[i] += ncall; + if (std::isnormal(tmp[i])) { + e.values[i] += tmp[i]; + e.counts[i] += ncall; + } } e.ncall += ncall; diff --git a/src/llama.cpp b/src/llama.cpp index f50972249..250cecc1d 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16767,19 +16767,25 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s if (params->only_copy) { ftype = model.ftype; } - const std::unordered_map> * imatrix_data = nullptr; + std::unordered_map> * imatrix_data = nullptr; if (params->imatrix) { - imatrix_data = static_cast>*>(params->imatrix); + imatrix_data = static_cast>*>(params->imatrix); if (imatrix_data) { LLAMA_LOG_INFO("================================ Have weights data with %d entries\n",int(imatrix_data->size())); qs.has_imatrix = true; // check imatrix for nans or infs - for (const auto & kv : *imatrix_data) { - for (float f : kv.second) { - if (!std::isfinite(f)) { - throw std::runtime_error(format("imatrix contains non-finite value %f\n", f)); + for (auto it = imatrix_data->begin(); it != imatrix_data->end();) { + bool remove_entry = false; + + for (float f : it->second) { + if (!std::isnormal(f)) { + LLAMA_LOG_WARN("imatrix entry \"%s\" contains non-normal value %f, skipping!\n", it->first.c_str(), f); + remove_entry = true; + break; } } + + it = remove_entry ? imatrix_data->erase(it) : ++it; } } }