gguf : fixes + simplify example + add ggml_nbytes_pad()

2023-08-15 16:01:38 +03:00 · 2023-08-15 16:01:38 +03:00 · c9b2f7f1bf
commit c9b2f7f1bf
parent 4463965401
3 changed files with 17 additions and 93 deletions
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@ -21,91 +21,6 @@ static std::string to_string(const T & val) {
    return ss.str();
 }
 void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
    const int32_t n = val.size();
    fout.write((const char *) &n, sizeof(n));
    fout.write(val.c_str(), n);
 }
 void gguf_ex_write_i32(std::ofstream & fout, int32_t val) {
    fout.write((const char *) &val, sizeof(val));
 }
 void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
    fout.write((const char *) &val, sizeof(val));
 }
 template<typename T>
 void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
    gguf_ex_write_str(fout, key);
    fout.write((const char *) &type, sizeof(type));
    fout.write((const char *) &val,  sizeof(val));
    fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), to_string(val).c_str());
 }
 template<>
 void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
    gguf_ex_write_str(fout, key);
    fout.write((const char *) &type, sizeof(type));
    const int32_t n = val.size();
    fout.write((const char *) &n, sizeof(n));
    fout.write(val.c_str(), n);
    fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
 }
 template<typename T>
 void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
    gguf_ex_write_str(fout, key);
    {
        const enum gguf_type tarr = GGUF_TYPE_ARRAY;
        fout.write((const char *) &tarr, sizeof(tarr));
    }
    const int32_t n = val.size();
    fout.write((const char *) &type, sizeof(type));
    fout.write((const char *) &n,    sizeof(n));
    fout.write((const char *) val.data(), n * sizeof(T));
    fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
    for (int i = 0; i < n; ++i) {
        fprintf(stdout, "%s", to_string(val[i]).c_str());
        if (i < n - 1) {
            fprintf(stdout, ", ");
        }
    }
    fprintf(stdout, "]\n");
 }
 template<>
 void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
    gguf_ex_write_str(fout, key);
    {
        const enum gguf_type tarr = GGUF_TYPE_ARRAY;
        fout.write((const char *) &tarr, sizeof(tarr));
    }
    const int32_t n = val.size();
    fout.write((const char *) &type, sizeof(type));
    fout.write((const char *) &n,    sizeof(n));
    for (int i = 0; i < n; ++i) {
        const int32_t nstr = val[i].size();
        fout.write((const char *) &nstr, sizeof(nstr));
        fout.write(val[i].c_str(), nstr);
    }
    fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
    for (int i = 0; i < n; ++i) {
        fprintf(stdout, "%s", val[i].c_str());
        if (i < n - 1) {
            fprintf(stdout, ", ");
        }
    }
    fprintf(stdout, "]\n");
 }
 bool gguf_ex_write(const std::string & fname) {
    struct gguf_context * ctx = gguf_init_empty();
@ -120,9 +35,9 @@ bool gguf_ex_write(const std::string & fname) {
        gguf_set_val_bool(ctx, "some.parameter.bool",     true);
        gguf_set_val_str (ctx, "some.parameter.string",   "hello world");
-        //gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
+        gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16,   std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
-        //gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
+        gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
-        //gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING,  { "hello", "world", "!" });
+        gguf_set_arr_str (ctx, "some.parameter.arr.str",                    std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
    }
    struct ggml_init_params params = {
--- a/ggml.c
+++ b/ggml.c
@ -213,10 +213,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
                error_desc = "insufficient memory";
                break;
        }
-        GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
+        GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
            __func__, error_desc, size/(1024.0*1024.0));
        return NULL;
    }
    return aligned_memory;
 }
 #define GGML_ALIGNED_MALLOC(size)  ggml_aligned_malloc(size)
@ -4109,7 +4109,11 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
    //
    // is enough, but just in case, adding the second part
-    return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
+    return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
 }
 size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
 }
 size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@ -19271,6 +19275,10 @@ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tenso
    ctx->infos[idx].name.n    = strlen(tensor->name) + 1;
    ctx->infos[idx].name.data = strdup(tensor->name);
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
        ctx->infos[idx].ne[i] = 1;
    }
    ctx->infos[idx].n_dims = tensor->n_dims;
    for (int i = 0; i < tensor->n_dims; i++) {
        ctx->infos[idx].ne[i] = tensor->ne[i];
@ -19305,8 +19313,8 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
    // write header
    gguf_fwrite_el(file, &ctx->header.magic,     sizeof(ctx->header.magic));
    gguf_fwrite_el(file, &ctx->header.version,   sizeof(ctx->header.version));
    gguf_fwrite_el(file, &ctx->header.n_kv,      sizeof(ctx->header.n_kv));
    gguf_fwrite_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
    gguf_fwrite_el(file, &ctx->header.n_kv,      sizeof(ctx->header.n_kv));
    // write key-value pairs
    for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
--- a/ggml.h
+++ b/ggml.h
@ -566,6 +566,7 @@ extern "C" {
    GGML_API int64_t ggml_nelements   (const struct ggml_tensor * tensor);
    GGML_API int64_t ggml_nrows       (const struct ggml_tensor * tensor);
    GGML_API size_t  ggml_nbytes      (const struct ggml_tensor * tensor);
    GGML_API size_t  ggml_nbytes_pad  (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
    GGML_API size_t  ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
    GGML_API int     ggml_blck_size (enum ggml_type type);