From 0e3673991502ad3c998e78e2e15ae4369f4e0836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 13 Jun 2024 01:02:22 +0200 Subject: [PATCH 1/4] save partial imatrix --- examples/imatrix/imatrix.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 574f5ed9c..2e8437328 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -250,8 +250,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { } if (n_zeros > 0) { - fprintf(stderr, "%s: entry '%40s' has partial data (%.2f%%) - skipping\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all); - continue; + fprintf(stderr, "%s: entry '%40s' has partial data (%.2f%%)\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all); } n_entries++; @@ -275,7 +274,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { if (nval > 0) { std::vector tmp(nval); for (int i = 0; i < nval; i++) { - tmp[i] = (stat.values[i] / static_cast(stat.counts[i])) * static_cast(stat.ncall); + tmp[i] = stat.counts[i] ? (stat.values[i] / static_cast(stat.counts[i])) * static_cast(stat.ncall) : 0.0f; } out.write((const char*)tmp.data(), nval*sizeof(float)); } From 4ad3eb21bf7eb9c76b3b43db3129454a9ef7b5bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 13 Jun 2024 01:08:44 +0200 Subject: [PATCH 2/4] skip imatrix entries with non-normal data --- llama.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/llama.cpp b/llama.cpp index 8b675ea99..c64099b3f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15231,19 +15231,25 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s if (params->only_copy) { ftype = model.ftype; } - const std::unordered_map> * imatrix_data = nullptr; + std::unordered_map> * imatrix_data = nullptr; if (params->imatrix) { - imatrix_data = static_cast>*>(params->imatrix); + imatrix_data = static_cast>*>(params->imatrix); if (imatrix_data) { LLAMA_LOG_INFO("================================ Have weights data with %d entries\n",int(imatrix_data->size())); qs.has_imatrix = true; // check imatrix for nans or infs - for (const auto & kv : *imatrix_data) { - for (float f : kv.second) { - if (!std::isfinite(f)) { - throw std::runtime_error(format("imatrix contains non-finite value %f\n", f)); + for (auto it = imatrix_data->begin(); it != imatrix_data->end();) { + bool remove_entry = false; + + for (float f : it->second) { + if (!std::isnormal(f)) { + LLAMA_LOG_WARN("imatrix entry \"%s\" contains non-normal value %f, skipping!\n", it->first.c_str(), f); + remove_entry = true; + break; } } + + it = remove_entry ? imatrix_data->erase(it) : ++it; } } } From fe21ef7920e61a8192c7dda25e3206024c56b0e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 13 Jun 2024 02:10:17 +0200 Subject: [PATCH 3/4] correct counts on load --- examples/imatrix/imatrix.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 2e8437328..0cff9994d 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -343,8 +343,10 @@ bool IMatrixCollector::load_imatrix(const char * fname) { // Recreate the state as expected by save_imatrix(), and corerct for weighted sum. for (int i = 0; i < nval; i++) { - e.values[i] += tmp[i]; - e.counts[i] += ncall; + if (tmp[i]) { + e.values[i] += tmp[i]; + e.counts[i] += ncall; + } } e.ncall += ncall; From 4c29bb049436ededaff98af26853cc9c794b73f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 13 Jun 2024 08:16:29 +0200 Subject: [PATCH 4/4] clear out all non-normals on load --- examples/imatrix/imatrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 0cff9994d..f8949c189 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -343,7 +343,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) { // Recreate the state as expected by save_imatrix(), and corerct for weighted sum. for (int i = 0; i < nval; i++) { - if (tmp[i]) { + if (std::isnormal(tmp[i])) { e.values[i] += tmp[i]; e.counts[i] += ncall; }