imatrix : fix conversion problems
This commit is contained in:
parent
3de9300c37
commit
c8ab6a3ba3
3 changed files with 14 additions and 7 deletions
|
@ -64,10 +64,11 @@ class IMatrixReader:
|
||||||
data = self._get(np.float32, nval)
|
data = self._get(np.float32, nval)
|
||||||
assert name not in self.entries, f"duplicated name: {name!r}"
|
assert name not in self.entries, f"duplicated name: {name!r}"
|
||||||
|
|
||||||
self.entries[name] = IMatrixEntry(data, np.array([ncall * self.chunk_size], dtype=np.float32))
|
self.entries[name] = IMatrixEntry(data * np.float32(self.chunk_size), np.array([ncall * self.chunk_size], dtype=np.float32))
|
||||||
|
|
||||||
self.chunk_count = self._get(np.int32).item()
|
self.chunk_count = self._get(np.int32).item()
|
||||||
self.dataset = self._get(np.uint8, self._get(np.int32).item()).tobytes().decode("utf-8")
|
dataset_len = self._get(np.int32).item()
|
||||||
|
self.dataset = self._get(np.uint8, dataset_len).tobytes().decode("utf-8")
|
||||||
|
|
||||||
def to_writer(self, outfile: Path) -> IMatrixWriter:
|
def to_writer(self, outfile: Path) -> IMatrixWriter:
|
||||||
writer = IMatrixWriter(path=outfile, arch="")
|
writer = IMatrixWriter(path=outfile, arch="")
|
||||||
|
@ -110,6 +111,9 @@ if __name__ == "__main__":
|
||||||
input_file: Path = args.imatrix
|
input_file: Path = args.imatrix
|
||||||
if input_file.suffix != ".gguf":
|
if input_file.suffix != ".gguf":
|
||||||
args.outfile = input_file.with_suffix(".gguf")
|
args.outfile = input_file.with_suffix(".gguf")
|
||||||
|
if args.outfile.exists():
|
||||||
|
logger.error(f"default file exists, specify with --outfile to overwrite: {args.outfile}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
writer = IMatrixReader(args.imatrix).to_writer(args.outfile)
|
writer = IMatrixReader(args.imatrix).to_writer(args.outfile)
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
|
||||||
static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";
|
static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";
|
||||||
|
|
||||||
struct Stats {
|
struct Stats {
|
||||||
std::vector<double> values;
|
std::vector<float> values;
|
||||||
std::vector<int64_t> counts;
|
std::vector<int64_t> counts;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -132,6 +132,7 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_
|
||||||
};
|
};
|
||||||
struct gguf_context * ctx_gguf = gguf_init_from_file(imatrix_file.c_str(), meta_gguf_params);
|
struct gguf_context * ctx_gguf = gguf_init_from_file(imatrix_file.c_str(), meta_gguf_params);
|
||||||
if (!ctx_gguf) {
|
if (!ctx_gguf) {
|
||||||
|
fprintf(stderr, "%s: if this is an older imatrix file, make sure to convert it to the GGUF-based imatrix format\n", __func__);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
const int32_t n_entries = gguf_get_n_tensors(ctx_gguf);
|
const int32_t n_entries = gguf_get_n_tensors(ctx_gguf);
|
||||||
|
@ -189,9 +190,9 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_
|
||||||
e.resize(ggml_nelements(sums));
|
e.resize(ggml_nelements(sums));
|
||||||
float max_count = 0.0f;
|
float max_count = 0.0f;
|
||||||
for (int64_t j = 0; j < ne1; ++j) {
|
for (int64_t j = 0; j < ne1; ++j) {
|
||||||
const float count = ((const float *) counts->data)[ne1];
|
const float count = ((const float *) counts->data)[j];
|
||||||
for (int64_t i = 0; i < ne0; ++i) {
|
for (int64_t i = 0; i < ne0; ++i) {
|
||||||
e[ne1*ne0 + ne0] = ((const float *) sums->data)[ne1*ne0 + ne0] / count;
|
e[j*ne0 + i] = ((const float *) sums->data)[j*ne0 + i] / count;
|
||||||
}
|
}
|
||||||
if (count > max_count) {
|
if (count > max_count) {
|
||||||
max_count = count;
|
max_count = count;
|
||||||
|
@ -201,14 +202,16 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_
|
||||||
printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), int(max_count / chunk_size), name.c_str());
|
printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), int(max_count / chunk_size), name.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gguf_free(ctx_gguf);
|
|
||||||
ggml_free(ctx);
|
|
||||||
|
|
||||||
int m_last_chunk = gguf_get_val_u32(ctx_gguf, chunk_count_idx);
|
int m_last_chunk = gguf_get_val_u32(ctx_gguf, chunk_count_idx);
|
||||||
imatrix_dataset = gguf_get_val_str(ctx_gguf, dataset_idx);
|
imatrix_dataset = gguf_get_val_str(ctx_gguf, dataset_idx);
|
||||||
|
|
||||||
printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str());
|
printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str());
|
||||||
printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_chunk);
|
printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_chunk);
|
||||||
|
|
||||||
|
gguf_free(ctx_gguf);
|
||||||
|
ggml_free(ctx);
|
||||||
|
|
||||||
return m_last_chunk;
|
return m_last_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue