From c9b2f7f1bf15c52174eea98dd7a72e7ed706fda6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 15 Aug 2023 16:01:38 +0300 Subject: [PATCH] gguf : fixes + simplify example + add ggml_nbytes_pad() --- examples/gguf/gguf.cpp | 93 ++---------------------------------------- ggml.c | 16 ++++++-- ggml.h | 1 + 3 files changed, 17 insertions(+), 93 deletions(-) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index fe5fe1b4d..fe22ab729 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -21,91 +21,6 @@ static std::string to_string(const T & val) { return ss.str(); } -void gguf_ex_write_str(std::ofstream & fout, const std::string & val) { - const int32_t n = val.size(); - fout.write((const char *) &n, sizeof(n)); - fout.write(val.c_str(), n); -} - -void gguf_ex_write_i32(std::ofstream & fout, int32_t val) { - fout.write((const char *) &val, sizeof(val)); -} - -void gguf_ex_write_u64(std::ofstream & fout, size_t val) { - fout.write((const char *) &val, sizeof(val)); -} - -template -void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) { - gguf_ex_write_str(fout, key); - fout.write((const char *) &type, sizeof(type)); - fout.write((const char *) &val, sizeof(val)); - - fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), to_string(val).c_str()); -} - -template<> -void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) { - gguf_ex_write_str(fout, key); - fout.write((const char *) &type, sizeof(type)); - - const int32_t n = val.size(); - fout.write((const char *) &n, sizeof(n)); - fout.write(val.c_str(), n); - - fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str()); -} - -template -void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector & val) { - gguf_ex_write_str(fout, key); - { - const enum gguf_type tarr = GGUF_TYPE_ARRAY; - fout.write((const char *) &tarr, sizeof(tarr)); - } - - const int32_t n = val.size(); - fout.write((const char *) &type, sizeof(type)); - fout.write((const char *) &n, sizeof(n)); - fout.write((const char *) val.data(), n * sizeof(T)); - - fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str()); - for (int i = 0; i < n; ++i) { - fprintf(stdout, "%s", to_string(val[i]).c_str()); - if (i < n - 1) { - fprintf(stdout, ", "); - } - } - fprintf(stdout, "]\n"); -} - -template<> -void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector & val) { - gguf_ex_write_str(fout, key); - { - const enum gguf_type tarr = GGUF_TYPE_ARRAY; - fout.write((const char *) &tarr, sizeof(tarr)); - } - - const int32_t n = val.size(); - fout.write((const char *) &type, sizeof(type)); - fout.write((const char *) &n, sizeof(n)); - for (int i = 0; i < n; ++i) { - const int32_t nstr = val[i].size(); - fout.write((const char *) &nstr, sizeof(nstr)); - fout.write(val[i].c_str(), nstr); - } - - fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str()); - for (int i = 0; i < n; ++i) { - fprintf(stdout, "%s", val[i].c_str()); - if (i < n - 1) { - fprintf(stdout, ", "); - } - } - fprintf(stdout, "]\n"); -} - bool gguf_ex_write(const std::string & fname) { struct gguf_context * ctx = gguf_init_empty(); @@ -118,11 +33,11 @@ bool gguf_ex_write(const std::string & fname) { gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679); gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f); gguf_set_val_bool(ctx, "some.parameter.bool", true); - gguf_set_val_str (ctx, "some.parameter.string", "hello world"); + gguf_set_val_str (ctx, "some.parameter.string", "hello world"); - //gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector{ 1, 2, 3, 4, }.data(), 4); - //gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector{ 3.145f, 2.718f, 1.414f, }.data(), 3); - //gguf_ex_write_arr(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" }); + gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector{ 1, 2, 3, 4, }.data(), 4); + gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector{ 3.145f, 2.718f, 1.414f, }.data(), 3); + gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector{ "hello", "world", "!" }.data(), 3); } struct ggml_init_params params = { diff --git a/ggml.c b/ggml.c index 754ceda25..c69a183e8 100644 --- a/ggml.c +++ b/ggml.c @@ -213,10 +213,10 @@ inline static void * ggml_aligned_malloc(size_t size) { error_desc = "insufficient memory"; break; } - GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", - __func__, error_desc, size/(1024.0*1024.0)); + GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); return NULL; } + return aligned_memory; } #define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) @@ -4109,7 +4109,11 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) { // // is enough, but just in case, adding the second part - return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN); + return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]); +} + +size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { + return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN); } size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) { @@ -19271,6 +19275,10 @@ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tenso ctx->infos[idx].name.n = strlen(tensor->name) + 1; ctx->infos[idx].name.data = strdup(tensor->name); + for (int i = 0; i < GGML_MAX_DIMS; ++i) { + ctx->infos[idx].ne[i] = 1; + } + ctx->infos[idx].n_dims = tensor->n_dims; for (int i = 0; i < tensor->n_dims; i++) { ctx->infos[idx].ne[i] = tensor->ne[i]; @@ -19305,8 +19313,8 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) { // write header gguf_fwrite_el(file, &ctx->header.magic, sizeof(ctx->header.magic)); gguf_fwrite_el(file, &ctx->header.version, sizeof(ctx->header.version)); - gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); gguf_fwrite_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); + gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); // write key-value pairs for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { diff --git a/ggml.h b/ggml.h index d19574ee9..5984d433d 100644 --- a/ggml.h +++ b/ggml.h @@ -566,6 +566,7 @@ extern "C" { GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor); GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split); GGML_API int ggml_blck_size (enum ggml_type type);