From 055a0c2e12d6c558638e71b449efe5162ca33dd0 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Wed, 10 Jan 2024 18:25:26 +0200 Subject: [PATCH] imatrix: WIP --- examples/imatrix/imatrix.cpp | 144 ++++++++++++++++++++++++++--------- ggml.c | 2 +- ggml.h | 2 +- 3 files changed, 110 insertions(+), 38 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 5c9147b86..ff4489c4e 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -33,31 +33,25 @@ struct StatParams { bool collect_output_weight = false; }; -static void ik_save_statistics(const char * fname, const std::unordered_map& stats, int ncall) { - std::ofstream out(fname, std::ios::binary); - int n_entries = stats.size(); - out.write((const char*)&n_entries, sizeof(n_entries)); - for (auto& p : stats) { - int len = p.first.size(); - out.write((const char*)&len, sizeof(len)); - out.write(p.first.c_str(), len); - out.write((const char*)&p.second.ncall, sizeof(p.second.ncall)); - int nval = p.second.values.size(); - out.write((const char*)&nval, sizeof(nval)); - if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float)); - } - fprintf(stderr, "%s: stored collected data after %d calls in %s\n",__func__,ncall,fname); -} +class IMatrixCollector { +public: + IMatrixCollector() = default; + void set_parameters(StatParams&& params) { m_params = std::move(params); } + void collect_imatrix(const struct ggml_tensor * src0, const struct ggml_tensor * src1); + void save_imatrix() const; +private: + std::unordered_map m_stats; + StatParams m_params; + std::mutex m_mutex; + int m_last_call = 0; +}; -static void ik_collect_imatrix(const struct ggml_tensor * src0, const struct ggml_tensor * src1) { - static int last_call = 0; - static std::mutex mutex; +void IMatrixCollector::collect_imatrix(const struct ggml_tensor * src0, const struct ggml_tensor * src1) { if (src1->ne[1] < 16 || src1->type != GGML_TYPE_F32) return; - //if (strncmp(src0->name, "blk.", 4) != 0 && strcmp(src0->name, "output.weight") != 0) return; - if (strncmp(src0->name, "blk.", 4) != 0) return; - std::lock_guard lock(mutex); - auto& g_stats = ik_get_stats(); - auto& e = g_stats[src0->name]; + if (!(strncmp(src0->name, "blk.", 4) == 0 || (m_params.collect_output_weight && strcmp(src0->name, "output.weight") == 0))) return; + //if (strncmp(src0->name, "blk.", 4) != 0) return; + std::lock_guard lock(m_mutex); + auto& e = m_stats[src0->name]; if (e.values.empty()) { e.values.resize(src1->ne[0], 0); } @@ -66,21 +60,89 @@ static void ik_collect_imatrix(const struct ggml_tensor * src0, const struct ggm exit(1); //GGML_ASSERT(false); } ++e.ncall; - printf("%s[%d]: %s, %d x %d, %d\n",__func__,last_call,src0->name,(int)src1->ne[0],(int)src1->ne[1],(int)src1->type); + printf("%s[%d]: %s, %d x %d, %d\n",__func__,m_last_call,src0->name,(int)src1->ne[0],(int)src1->ne[1],(int)src1->type); for (int row = 0; row < (int)src1->ne[1]; ++row) { const float * x = (const float *)src1->data + row * src1->ne[0]; for (int j = 0; j < (int)src1->ne[0]; ++j) { e.values[j] += x[j]*x[j]; } } - if (e.ncall > last_call) { - last_call = e.ncall; - if (last_call % 10 == 0) { - ik_save_statistics("stats.dat", g_stats, last_call); + if (e.ncall > m_last_call) { + m_last_call = e.ncall; + if (m_last_call % m_params.n_output_frequency == 0) { + save_imatrix(); } } } +void IMatrixCollector::save_imatrix() const { + const char * fname = m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str(); + std::ofstream out(fname, std::ios::binary); + int n_entries = m_stats.size(); + out.write((const char*)&n_entries, sizeof(n_entries)); + for (auto& p : m_stats) { + int len = p.first.size(); + out.write((const char*)&len, sizeof(len)); + out.write(p.first.c_str(), len); + out.write((const char*)&p.second.ncall, sizeof(p.second.ncall)); + int nval = p.second.values.size(); + out.write((const char*)&nval, sizeof(nval)); + if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float)); + } + fprintf(stderr, "%s: stored collected data after %d calls in %s\n",__func__,m_last_call,fname); +} + +static IMatrixCollector g_collector; + +//static void ik_save_statistics(const char * fname, const std::unordered_map& stats, int ncall) { +// std::ofstream out(fname, std::ios::binary); +// int n_entries = stats.size(); +// out.write((const char*)&n_entries, sizeof(n_entries)); +// for (auto& p : stats) { +// int len = p.first.size(); +// out.write((const char*)&len, sizeof(len)); +// out.write(p.first.c_str(), len); +// out.write((const char*)&p.second.ncall, sizeof(p.second.ncall)); +// int nval = p.second.values.size(); +// out.write((const char*)&nval, sizeof(nval)); +// if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float)); +// } +// fprintf(stderr, "%s: stored collected data after %d calls in %s\n",__func__,ncall,fname); +//} + +static void ik_collect_imatrix(const struct ggml_tensor * src0, const struct ggml_tensor * src1) { + g_collector.collect_imatrix(src0, src1); + //static int last_call = 0; + //static std::mutex mutex; + //if (src1->ne[1] < 16 || src1->type != GGML_TYPE_F32) return; + ////if (strncmp(src0->name, "blk.", 4) != 0 && strcmp(src0->name, "output.weight") != 0) return; + //if (strncmp(src0->name, "blk.", 4) != 0) return; + //std::lock_guard lock(mutex); + //auto& g_stats = ik_get_stats(); + //auto& e = g_stats[src0->name]; + //if (e.values.empty()) { + // e.values.resize(src1->ne[0], 0); + //} + //else if (e.values.size() != (size_t)src1->ne[0]) { + // fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", src0->name, (int)e.values.size(), (int)src1->ne[0]); + // exit(1); //GGML_ASSERT(false); + //} + //++e.ncall; + //printf("%s[%d]: %s, %d x %d, %d\n",__func__,last_call,src0->name,(int)src1->ne[0],(int)src1->ne[1],(int)src1->type); + //for (int row = 0; row < (int)src1->ne[1]; ++row) { + // const float * x = (const float *)src1->data + row * src1->ne[0]; + // for (int j = 0; j < (int)src1->ne[0]; ++j) { + // e.values[j] += x[j]*x[j]; + // } + //} + //if (e.ncall > last_call) { + // last_call = e.ncall; + // if (last_call % 10 == 0) { + // ik_save_statistics("stats.dat", g_stats, last_call); + // } + //} +} + struct results_log_softmax { double log_softmax; @@ -273,7 +335,9 @@ int main(int argc, char ** argv) { StatParams sparams; std::vector args; - for (int iarg = 1; iarg < argc-1; ++iarg) { + args.push_back(argv[0]); + int iarg = 1; + for (; iarg < argc-1; ++iarg) { std::string arg{argv[iarg]}; if (arg == "-o" || arg == "--output-file") { sparams.ofile = argv[++iarg]; @@ -287,6 +351,9 @@ int main(int argc, char ** argv) { args.push_back(argv[iarg]); } } + if (iarg < argc) { + args.push_back(argv[iarg]); + } gpt_params params; params.n_batch = 512; @@ -294,7 +361,10 @@ int main(int argc, char ** argv) { return 1; } - ggml_set_stat_collection(ik_collect_imatrix); + g_collector.set_parameters(std::move(sparams)); + + ggml_set_imatrix_collection(ik_collect_imatrix); + ggml_set_imatrix_collection(ik_collect_imatrix); params.logits_all = true; params.n_batch = std::min(params.n_batch, params.n_ctx); @@ -340,12 +410,14 @@ int main(int argc, char ** argv) { if (!OK) { return 1; } - auto& stats = ik_get_stats(); - int ncall = 0; - for (auto& s : stats) { - ncall = std::max(ncall, s.second.ncall); - } - ik_save_statistics(sparams.ofile.c_str(), stats, ncall); + + g_collector.save_imatrix(); + //auto& stats = ik_get_stats(); + //int ncall = 0; + //for (auto& s : stats) { + // ncall = std::max(ncall, s.second.ncall); + //} + //ik_save_statistics(sparams.ofile.c_str(), stats, ncall); llama_print_timings(ctx); diff --git a/ggml.c b/ggml.c index e71552437..0a8d75888 100644 --- a/ggml.c +++ b/ggml.c @@ -396,7 +396,7 @@ static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * rest ggml_collect_imatrix_t g_imatrix_collect = NULL; -void ggml_set_stat_collection(ggml_collect_imatrix_t imatrix_collect) { +void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect) { g_imatrix_collect = imatrix_collect; } diff --git a/ggml.h b/ggml.h index 69e66bfc0..b50b3d17f 100644 --- a/ggml.h +++ b/ggml.h @@ -2077,7 +2077,7 @@ extern "C" { // Importance matrix // typedef void(*ggml_collect_imatrix_t)(const struct ggml_tensor * src0, const struct ggml_tensor * src1); - GGML_API void ggml_set_stat_collection(ggml_collect_imatrix_t imatrix_collect); + GGML_API void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect); // // gguf