From 851de160dd3b4b3f48eac94e914aaa861eaa2ddd Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 18:03:10 +0200 Subject: [PATCH] quantize: add imatrix m_last_call as `quantize.imatrix.chunks_count` --- examples/imatrix/imatrix.cpp | 3 +++ examples/quantize/quantize.cpp | 28 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 18ea8016f..b2d813115 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -210,6 +210,9 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float)); } + // Write the number of call the matrix was computed with + out.write((const char *) &m_last_call, sizeof(m_last_call)); + // Write the dataset name at the end of the file to later on specify it in quantize int n_dataset = strlen(dataset); out.write((const char *) &n_dataset, sizeof(n_dataset)); diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 7d5e1d5bb..dcc1fec39 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -53,7 +53,8 @@ static const std::vector QUANT_OPTIONS = { }; static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset"; -static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.n_entries"; +static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count"; +static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count"; static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { std::string ftype_str; @@ -113,7 +114,7 @@ static void usage(const char * executable) { exit(1); } -static void load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map> & imatrix_data) { +static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map> & imatrix_data) { std::ifstream in(imatrix_file.c_str(), std::ios::binary); if (!in) { printf("%s: failed to open %s\n",__func__, imatrix_file.c_str()); @@ -162,7 +163,9 @@ static void load_imatrix(const std::string & imatrix_file, std::string & imatrix } // latest imatrix version contains the dataset filename at the end of the file + int m_last_call = 0; if (in.peek() != EOF) { + in.read((char *)&m_last_call, sizeof(m_last_call)); int dataset_len; in.read((char *)&dataset_len, sizeof(dataset_len)); std::vector dataset_as_vec(dataset_len+1); @@ -171,19 +174,21 @@ static void load_imatrix(const std::string & imatrix_file, std::string & imatrix imatrix_dataset = std::string{dataset_as_vec.data()}; printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str()); } - printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str()); + printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_call); + return m_last_call; } -static void prepare_imatrix(const std::string & imatrix_file, +static int prepare_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, const std::vector & included_weights, const std::vector & excluded_weights, std::unordered_map> & imatrix_data) { + int m_last_call = -1; if (!imatrix_file.empty()) { - load_imatrix(imatrix_file, imatrix_dataset, imatrix_data); + m_last_call = load_imatrix(imatrix_file, imatrix_dataset, imatrix_data); } if (imatrix_data.empty()) { - return; + return m_last_call; } if (!excluded_weights.empty()) { for (auto& name : excluded_weights) { @@ -209,6 +214,7 @@ static void prepare_imatrix(const std::string & imatrix_file, if (!imatrix_data.empty()) { printf("%s: have %d importance matrix entries\n", __func__, int(imatrix_data.size())); } + return m_last_call; } static ggml_type parse_ggml_type(const char * arg) { @@ -291,7 +297,7 @@ int main(int argc, char ** argv) { std::string imatrix_dataset; std::unordered_map> imatrix_data; - prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data); + int m_last_call = prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data); if (!imatrix_data.empty()) { params.imatrix = &imatrix_data; if (!imatrix_dataset.empty()) { @@ -309,6 +315,14 @@ int main(int argc, char ** argv) { kvo.int_value = imatrix_data.size(); kv_overrides.emplace_back(std::move(kvo)); } + + if (m_last_call > 0) { + llama_model_kv_override kvo; + std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS); + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.int_value = m_last_call; + kv_overrides.emplace_back(std::move(kvo)); + } } if (!kv_overrides.empty()) { kv_overrides.emplace_back();