Fixed save_imatrix to match old behaviour for MoE

This fix is simple and clear, but unnecessarily doubles the memory overhead..
This commit is contained in:
jukofyork 2024-05-06 10:58:35 +01:00 committed by GitHub
parent bcdee0daa7
commit 8ac51cadab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -19,6 +19,7 @@
struct Stats { struct Stats {
std::vector<float> values; std::vector<float> values;
std::vector<int> counts;
int ncall = 0; int ncall = 0;
}; };
@ -120,13 +121,14 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
auto & e = m_stats[wname]; auto & e = m_stats[wname];
++e.ncall; // We select top-k experts, the number of calls for the expert tensors will be k times larger.
// NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger // NOTE: This will trigger the "if (e.ncall > m_last_call)" save conditional on the first active expert.
// using the following line, we can correct for that if needed by replacing the line above with: // The commented out "if (idx == t->src[0]->ne[0] - 1) ++e.ncall;" doesn't work.
//if (idx == t->src[0]->ne[0] - 1) ++e.ncall; if (idx == 0) ++e.ncall;
if (e.values.empty()) { if (e.values.empty()) {
e.values.resize(src1->ne[0]*n_as, 0); e.values.resize(src1->ne[0]*n_as, 0);
e.counts.resize(src1->ne[0]*n_as, 0); // +++
} }
else if (e.values.size() != (size_t)src1->ne[0]*n_as) { else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as); fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
@ -153,6 +155,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
for (int j = 0; j < (int)src1->ne[0]; ++j) { for (int j = 0; j < (int)src1->ne[0]; ++j) {
e.values[e_start + j] += x[j]*x[j]; e.values[e_start + j] += x[j]*x[j];
e.counts[e_start + j]++;
} }
} }
} }
@ -170,6 +173,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
auto& e = m_stats[wname]; auto& e = m_stats[wname];
if (e.values.empty()) { if (e.values.empty()) {
e.values.resize(src1->ne[0], 0); e.values.resize(src1->ne[0], 0);
e.counts.resize(src1->ne[0], 0);
} }
else if (e.values.size() != (size_t)src1->ne[0]) { else if (e.values.size() != (size_t)src1->ne[0]) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]); fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);
@ -183,6 +187,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
const float * x = data + row * src1->ne[0]; const float * x = data + row * src1->ne[0];
for (int j = 0; j < (int)src1->ne[0]; ++j) { for (int j = 0; j < (int)src1->ne[0]; ++j) {
e.values[j] += x[j]*x[j]; e.values[j] += x[j]*x[j];
e.counts[j]++;
} }
} }
if (e.ncall > m_last_call) { if (e.ncall > m_last_call) {
@ -222,7 +227,13 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co
out.write((const char *) &p.second.ncall, sizeof(p.second.ncall)); out.write((const char *) &p.second.ncall, sizeof(p.second.ncall));
int nval = p.second.values.size(); int nval = p.second.values.size();
out.write((const char *) &nval, sizeof(nval)); out.write((const char *) &nval, sizeof(nval));
if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float)); if (nval > 0) {
std::vector<float> tmp(nval);
for (int i = 0; i < nval; i++) {
tmp[i] = (p.second.values[i] / static_cast<float>(p.second.counts[i])) * static_cast<float>(p.second.ncall);
}
out.write((const char*)tmp.data(), nval*sizeof(float))
}
} }
// Write the number of call the matrix was computed with // Write the number of call the matrix was computed with