From c8ab6a3ba356e902b94499baaf7ab0191c3b6afe Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sun, 8 Sep 2024 10:04:01 -0400 Subject: [PATCH] imatrix : fix conversion problems --- convert_legacy_imatrix_to_gguf.py | 8 ++++++-- examples/imatrix/imatrix.cpp | 2 +- examples/quantize/quantize.cpp | 11 +++++++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/convert_legacy_imatrix_to_gguf.py b/convert_legacy_imatrix_to_gguf.py index 939d3695b..bd72655bf 100644 --- a/convert_legacy_imatrix_to_gguf.py +++ b/convert_legacy_imatrix_to_gguf.py @@ -64,10 +64,11 @@ class IMatrixReader: data = self._get(np.float32, nval) assert name not in self.entries, f"duplicated name: {name!r}" - self.entries[name] = IMatrixEntry(data, np.array([ncall * self.chunk_size], dtype=np.float32)) + self.entries[name] = IMatrixEntry(data * np.float32(self.chunk_size), np.array([ncall * self.chunk_size], dtype=np.float32)) self.chunk_count = self._get(np.int32).item() - self.dataset = self._get(np.uint8, self._get(np.int32).item()).tobytes().decode("utf-8") + dataset_len = self._get(np.int32).item() + self.dataset = self._get(np.uint8, dataset_len).tobytes().decode("utf-8") def to_writer(self, outfile: Path) -> IMatrixWriter: writer = IMatrixWriter(path=outfile, arch="") @@ -110,6 +111,9 @@ if __name__ == "__main__": input_file: Path = args.imatrix if input_file.suffix != ".gguf": args.outfile = input_file.with_suffix(".gguf") + if args.outfile.exists(): + logger.error(f"default file exists, specify with --outfile to overwrite: {args.outfile}") + exit(1) writer = IMatrixReader(args.imatrix).to_writer(args.outfile) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 2314a035d..fea97918a 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -31,7 +31,7 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count"; static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size"; struct Stats { - std::vector values; + std::vector values; std::vector counts; }; diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 2df073d45..4f7003194 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -132,6 +132,7 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_ }; struct gguf_context * ctx_gguf = gguf_init_from_file(imatrix_file.c_str(), meta_gguf_params); if (!ctx_gguf) { + fprintf(stderr, "%s: if this is an older imatrix file, make sure to convert it to the GGUF-based imatrix format\n", __func__); exit(1); } const int32_t n_entries = gguf_get_n_tensors(ctx_gguf); @@ -189,9 +190,9 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_ e.resize(ggml_nelements(sums)); float max_count = 0.0f; for (int64_t j = 0; j < ne1; ++j) { - const float count = ((const float *) counts->data)[ne1]; + const float count = ((const float *) counts->data)[j]; for (int64_t i = 0; i < ne0; ++i) { - e[ne1*ne0 + ne0] = ((const float *) sums->data)[ne1*ne0 + ne0] / count; + e[j*ne0 + i] = ((const float *) sums->data)[j*ne0 + i] / count; } if (count > max_count) { max_count = count; @@ -201,14 +202,16 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_ printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), int(max_count / chunk_size), name.c_str()); } } - gguf_free(ctx_gguf); - ggml_free(ctx); int m_last_chunk = gguf_get_val_u32(ctx_gguf, chunk_count_idx); imatrix_dataset = gguf_get_val_str(ctx_gguf, dataset_idx); printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str()); printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_chunk); + + gguf_free(ctx_gguf); + ggml_free(ctx); + return m_last_chunk; }