running

2023-11-22 09:04:00 -05:00 · 2023-11-22 09:04:00 -05:00 · 6fd690fae7
commit 6fd690fae7
parent 6f8adf99d5
12 changed files with 116 additions and 97 deletions
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@ -1528,9 +1528,9 @@ int main(int argc, char ** argv) {
    for (int ex=0; ex<n_examples; ++ex) {
        struct ggml_init_params params = {
-            /*.mem_size   =*/ compute_size,
+            .mem_size   = compute_size,
-            /*.mem_buffer =*/ compute_addr,
+            .mem_buffer = compute_addr,
-            /*.no_alloc   =*/ false,
+            .no_alloc   = false,
        };
        struct ggml_context * ctx0 = ggml_init(params);
@ -1603,9 +1603,9 @@ int main(int argc, char ** argv) {
        printf("---\n");
        for (int i=0; i<n_gen; ++i) {
            struct ggml_init_params params = {
-                /*.mem_size   =*/ compute_size,
+                .mem_size   = compute_size,
-                /*.mem_buffer =*/ compute_addr,
+                .mem_buffer = compute_addr,
-                /*.no_alloc   =*/ false,
+                .no_alloc   = false,
            };
            struct ggml_context * ctx0 = ggml_init(params);
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@ -141,9 +141,9 @@ int main(int argc, char ** argv)  {
    printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
    struct ggml_init_params params = {
-        /*.mem_size   =*/ ctx_size,
+        .mem_size   = ctx_size,
-        /*.mem_buffer =*/ NULL,
+        .mem_buffer = NULL,
-        /* no_alloc   =*/ 0
+        .no_alloc   = 0
    };
    ctx = ggml_init(params);
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@ -554,8 +554,8 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
        struct ggml_context * ctx_data = NULL;
        struct gguf_init_params params = {
-            /*.no_alloc = */ false,
+	  .no_alloc =  false,
-            /*.ctx      = */ &ctx_data,
+	  .ctx      =  &ctx_data,
        };
        struct gguf_context * ctx = gguf_init_from_file(filename, params);
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@ -295,8 +295,8 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
    // get parameters directly from gguf file
    {
        struct gguf_init_params params = {
-            /*.no_alloc = */ false,
+	  .no_alloc = false,
-            /*.ctx      = */ NULL,
+	  .ctx      = NULL,
        };
        struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
@ -1709,9 +1709,9 @@ int main(int argc, char ** argv) {
    // context for input tensors without their data
    struct ggml_init_params ctx_input_params = {
-        ggml_tensor_overhead() * 2, // mem_size
+      .mem_size= ggml_tensor_overhead() * 2, // mem_size
-        NULL,                       // mem_buffer
+      .mem_buffer=NULL,                       // mem_buffer
-        true,                       // no_alloc
+      .no_alloc=true,                       // no_alloc
    };
    struct ggml_context * ctx_input = ggml_init(ctx_input_params);
@ -1738,9 +1738,9 @@ int main(int argc, char ** argv) {
            (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
    );
    struct ggml_init_params ctx_compute_params = {
-        estimated_compute_size_wo_data, // mem_size
+      .mem_size=estimated_compute_size_wo_data, // mem_size
-        NULL,                           // mem_buffer
+      .mem_buffer=NULL,                           // mem_buffer
-        true,                           // no_alloc
+      .no_alloc=true,                           // no_alloc
    };
    struct ggml_context * ctx_compute = NULL;
@ -1905,9 +1905,9 @@ int main(int argc, char ** argv) {
    // context for work buffer
    struct ggml_init_params ctx_work_params = {
-        max_work_size, // mem_size
+      .mem_size= max_work_size, // mem_size
-        NULL,          // mem_buffer
+      .mem_buffer = NULL,          // mem_buffer
-        false,         // no_alloc
+      .no_alloc  = false,         // no_alloc
    };
    struct ggml_context * ctx_work = ggml_init(ctx_work_params);
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@ -41,9 +41,9 @@ static bool gguf_ex_write(const std::string & fname) {
    gguf_set_arr_str (ctx, "some.parameter.arr.str",                    std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
    struct ggml_init_params params = {
-        /*.mem_size   =*/ 128ull*1024ull*1024ull,
+        .mem_size   = 128ull*1024ull*1024ull,
-        /*.mem_buffer =*/ NULL,
+        .mem_buffer = NULL,
-        /*.no_alloc   =*/ false,
+        .no_alloc   = false,
    };
    struct ggml_context * ctx_data = ggml_init(params);
@ -87,8 +87,8 @@ static bool gguf_ex_write(const std::string & fname) {
 // just read tensor info
 static bool gguf_ex_read_0(const std::string & fname) {
    struct gguf_init_params params = {
-        /*.no_alloc = */ false,
+      .no_alloc = false,
-        /*.ctx      = */ NULL,
+      .ctx      = NULL,
    };
    struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@ -147,8 +147,8 @@ static bool gguf_ex_read_1(const std::string & fname) {
    struct ggml_context * ctx_data = NULL;
    struct gguf_init_params params = {
-        /*.no_alloc = */ false,
+      .no_alloc =  false,
-        /*.ctx      = */ &ctx_data,
+      .ctx      =  &ctx_data,
    };
    struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@ -256,9 +256,9 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
    const auto & buf_compute = ctx->buf_compute;
    struct ggml_init_params params = {
-        /*.mem_size =*/ buf_compute.size,
+      .mem_size = buf_compute.size,
-        /*.mem_buffer =*/ buf_compute.data,
+      .mem_buffer = buf_compute.data,
-        /*.no_alloc =*/ false,
+      .no_alloc = false,
    };
    params.no_alloc = true;
@ -456,8 +456,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
    struct ggml_context * meta = NULL;
    struct gguf_init_params params = {
-        /*.no_alloc = */ true,
+      .no_alloc =  true,
-        /*.ctx      = */ &meta,
+      .ctx      =  &meta,
    };
    struct gguf_context * ctx = gguf_init_from_file(fname, params);
@ -553,9 +553,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
    // load tensors
    {
        struct ggml_init_params params = {
-            /*.mem_size =*/ ctx_size,
+	  .mem_size = ctx_size,
-            /*.mem_buffer =*/ NULL,
+	  .mem_buffer = NULL,
-            /*.no_alloc =*/ false,
+          .no_alloc = false,
        };
        new_clip->ctx = ggml_init(params);
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -601,8 +601,8 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
    // set vocab by copying from vocab_model gguf file
    {
        struct gguf_init_params params = {
-            /*.no_alloc = */ false,
+	  .no_alloc =  false,
-            /*.ctx      = */ NULL,
+	  .ctx      = NULL,
        };
        struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
@ -1086,9 +1086,9 @@ int main(int argc, char ** argv) {
    // context for input tensors without their data
    struct ggml_init_params ctx_input_params = {
-        ggml_tensor_overhead() * 2, // mem_size
+        .mem_size = ggml_tensor_overhead() * 2, // mem_size
-        NULL,                       // mem_buffer
+        .mem_buffer = NULL,                       // mem_buffer
-        true,                       // no_alloc
+        .no_alloc = true,                       // no_alloc
    };
    struct ggml_context * ctx_input = ggml_init(ctx_input_params);
@ -1115,9 +1115,9 @@ int main(int argc, char ** argv) {
            (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
    );
    struct ggml_init_params ctx_compute_params = {
-        estimated_compute_size_wo_data, // mem_size
+      .mem_size = estimated_compute_size_wo_data, // mem_size
-        NULL,                           // mem_buffer
+      .mem_buffer= NULL,                           // mem_buffer
-        true,                           // no_alloc
+      .no_alloc = true,                           // no_alloc
    };
    struct ggml_context * ctx_compute = NULL;
@ -1268,9 +1268,9 @@ int main(int argc, char ** argv) {
    // context for work buffer
    struct ggml_init_params ctx_work_params = {
-        max_work_size, // mem_size
+      .mem_size= max_work_size, // 
-        NULL,          // mem_buffer
+      .mem_buffer= NULL,          // 
-        false,         // no_alloc
+      .no_alloc=false,         // 
    };
    struct ggml_context * ctx_work = ggml_init(ctx_work_params);
--- a/ggml-alloc.cpp
+++ b/ggml-alloc.cpp
@ -351,15 +351,17 @@ struct ggml_gallocr {
 ggml_gallocr_t ggml_gallocr_new(void) {
    ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr));
    ggml_hash_set hs = {.size=0, .keys=NULL};
    *galloc = (struct ggml_gallocr) {
-        /*.talloc           = */ NULL,
+      .talloc           =  NULL,
-        /*.hash_set         = */ {0},
+      .hash_set  =hs,
-        /*.hash_values      = */ NULL,
+      .hash_values      =  NULL,
-        /*.hash_values_size = */ 0,
+      .hash_values_size =  0,
-        /*.hash_allocs      = */ NULL,
+      .hash_allocs      =  NULL,
-        /*.parse_seq        = */ NULL,
+      .parse_seq        =  NULL,
-        /*.parse_seq_len    = */ 0,
+      .parse_seq_len    =  0,
    };
    //((*galloc).hash_set)[0]         =  0;
    return galloc;
 }
@ -706,8 +708,8 @@ struct ggml_allocr {
 static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) {
    ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr));
    *alloc = (struct ggml_allocr) {
-        /*.talloc = */ talloc,
+      .talloc =  talloc,
-        /*.galloc = */ ggml_gallocr_new(),
+      .galloc =  ggml_gallocr_new(),
    };
    return alloc;
 }
--- a/ggml-backend.cpp
+++ b/ggml-backend.cpp
@ -587,9 +587,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
    sched->n_splits = 0;
    struct ggml_init_params params = {
-        /*.mem_size =   */ sizeof(sched->context_buffer),
+        .mem_size =   sizeof(sched->context_buffer),
-        /*.mem_buffer = */ sched->context_buffer,
+        .mem_buffer =  sched->context_buffer,
-        /*.no_alloc =   */ true
+        .no_alloc =    true
    };
    if (sched->ctx != NULL) {
--- a/ggml.cpp
+++ b/ggml.cpp
@ -2,6 +2,8 @@
 //https://github.com/Neargye/magic_enum.git
 #include <magic_enum.hpp>
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #define _USE_MATH_DEFINES // For M_PI on MSVC
@ -16136,11 +16138,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
            // all other threads are finished and spinning
            // do finalize and init here so we don't have synchronize again
            struct ggml_compute_params params = {
-                /*.type  =*/ GGML_TASK_FINALIZE,
+	      .type  = GGML_TASK_FINALIZE,
-                /*.ith   =*/ 0,
+	      .ith   = 0,
-                /*.nth   =*/ 0,
+	      .nth   = 0,
-                /*.wsize =*/ cplan->work_size,
+	      .wsize = cplan->work_size,
-                /*.wdata =*/ cplan->work_data,
+	      .wdata = cplan->work_data,
            };
            if (node_n != -1) {
@ -16219,11 +16221,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
        const int n_tasks = ggml_get_n_tasks(node, n_threads);
        struct ggml_compute_params params = {
-            /*.type  =*/ GGML_TASK_COMPUTE,
+	  .type  = GGML_TASK_COMPUTE,
-            /*.ith   =*/ state->ith,
+	  .ith   = state->ith,
-            /*.nth   =*/ n_tasks,
+	  .nth   = n_tasks,
-            /*.wsize =*/ cplan->work_size,
+	  .wsize = cplan->work_size,
-            /*.wdata =*/ cplan->work_data,
+	  .wdata = cplan->work_data,
        };
        if (state->ith < n_tasks) {
--- a/ggml.h
+++ b/ggml.h
@ -1,5 +1,6 @@
 #pragma once
 #include<refl-cpp/refl.hpp>
 //
 // GGML Tensor Library
 //
@ -465,7 +466,7 @@ extern "C" {
    };
    // ggml object
-    struct ggml_object {
+    struct ggml_object : refl::attr::usage::type {
        size_t offs;
        size_t size;
@ -479,7 +480,7 @@ extern "C" {
    static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
    // n-dimensional tensor
-    struct ggml_tensor {
+    struct ggml_tensor : refl::attr::usage::type{
        enum ggml_type         type;
        enum ggml_backend_type backend;
@ -524,7 +525,7 @@ extern "C" {
    // the compute plan that needs to be prepared for ggml_graph_compute()
    // since https://github.com/ggerganov/ggml/issues/287
-    struct ggml_cplan {
+    struct ggml_cplan : refl::attr::usage::type{
        size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
        uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
@ -541,13 +542,13 @@ extern "C" {
        GGML_CGRAPH_EVAL_ORDER_COUNT
    };
-    struct ggml_hash_set {
+    struct ggml_hash_set : refl::attr::usage::type{
        size_t size;
        struct ggml_tensor ** keys;
    };
    // computation graph
-    struct ggml_cgraph {
+    struct ggml_cgraph : refl::attr::usage::type{
        int size;
        int n_nodes;
        int n_leafs;
@ -567,7 +568,7 @@ extern "C" {
    };
    // scratch buffer
-    struct ggml_scratch {
+    struct ggml_scratch : refl::attr::usage::type{
        size_t offs;
        size_t size;
        void * data;
@ -579,7 +580,7 @@ extern "C" {
      {}
    };
-    struct ggml_init_params {
+    struct ggml_init_params : refl::attr::usage::type{
        // memory pool
        size_t mem_size;   // bytes
        void * mem_buffer; // if NULL, memory will be allocated internally
@ -597,7 +598,7 @@ extern "C" {
        GGML_TASK_FINALIZE,
    };
-    struct ggml_compute_params {
+    struct ggml_compute_params : refl::attr::usage::type{
        enum ggml_task_type type;
        // ith = thread index, nth = number of threads
@ -1835,7 +1836,7 @@ extern "C" {
    //
    //   see ggml.c (ggml_opt_default_params) for default values
    //
-    struct ggml_opt_params {
+    struct ggml_opt_params : refl::attr::usage::type{
        enum ggml_opt_type type;
        size_t graph_size;
@ -1865,7 +1866,7 @@ extern "C" {
        int n_gradient_accumulation;
        // ADAM parameters
-        struct {
+        struct ggml_adam: refl::attr::usage::type{
            int n_iter;
            float sched; // schedule multiplier (fixed, decay or warmup)
@ -1881,7 +1882,7 @@ extern "C" {
        } adam;
        // LBFGS parameters
-        struct {
+        struct ggml_lbfgs: refl::attr::usage::type{
            int m; // number of corrections to approximate the inv. Hessian
            int n_iter;
            int max_linesearch;
@ -1896,7 +1897,7 @@ extern "C" {
        } lbfgs;
    };
-    struct ggml_opt_context {
+    struct ggml_opt_context : refl::attr::usage::type{
        struct ggml_context * ctx;
        struct ggml_opt_params params;
@ -1908,7 +1909,7 @@ extern "C" {
        float loss_before;
        float loss_after;
-        struct {
+        struct ggml_grad : refl::attr::usage::type{
            struct ggml_tensor * g;  // current gradient
            struct ggml_tensor * m;  // first moment
            struct ggml_tensor * v;  // second moment
@ -1918,7 +1919,7 @@ extern "C" {
            int n_no_improvement;
        } adam;
-        struct {
+        struct ggml_params : refl::attr::usage::type{
            struct ggml_tensor * x;    // current parameters
            struct ggml_tensor * xp;   // previous parameters
            struct ggml_tensor * g;    // current gradient
@ -2011,7 +2012,7 @@ extern "C" {
    struct gguf_context;
-    struct gguf_init_params {
+    struct gguf_init_params : refl::attr::usage::type{
        bool no_alloc;
        // if not NULL, create a ggml_context and allocate the tensor data in it
@ -2148,7 +2149,7 @@ extern "C" {
    typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void  * GGML_RESTRICT y, int k);
    typedef void (*ggml_vec_dot_t)   (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
-    typedef struct {
+    typedef struct ggml_something : refl::attr::usage::type{
        const char      * type_name;
        int               blck_size;
        size_t            type_size;
--- a/llama.cpp
+++ b/llama.cpp
@ -1786,8 +1786,8 @@ struct llama_model_loader {
    llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
        struct gguf_init_params params = {
-            /*.no_alloc = */ true,
+	  .no_alloc =  true,
-            /*.ctx      = */ &ctx_meta,
+	  .ctx      =  &ctx_meta,
        };
        ctx_gguf = gguf_init_from_file(fname.c_str(), params);
@ -2677,9 +2677,9 @@ static void llm_load_tensors(
        }
        struct ggml_init_params params = {
-            /*.mem_size   =*/ model.buf.size,
+            .mem_size   = model.buf.size,
-            /*.mem_buffer =*/ model.buf.data,
+            .mem_buffer = model.buf.data,
-            /*.no_alloc   =*/ ml.use_mmap,
+            .no_alloc   = ml.use_mmap,
        };
        model.ctx = ggml_init(params);
@ -3843,9 +3843,9 @@ struct llm_build_context {
    void init() {
        struct ggml_init_params params = {
-            /*.mem_size   =*/ buf_compute.size,
+	  .mem_size   = buf_compute.size,
-            /*.mem_buffer =*/ buf_compute.data,
+          .mem_buffer = buf_compute.data,
-            /*.no_alloc   =*/ true,
+          .no_alloc   = true,
        };
        ctx0 = ggml_init(params);
@ -8427,7 +8427,10 @@ void llama_backend_init(bool numa) {
    // needed to initialize f16 tables
    {
-        struct ggml_init_params params = { 0, NULL, false };
+        struct ggml_init_params params = { .mem_size = 0,
 					   .mem_buffer = NULL,
 					   .no_alloc = false
 	};
        struct ggml_context * ctx = ggml_init(params);
        ggml_free(ctx);
    }
@ -8998,7 +9001,13 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
        if (kv_buf_size) {
            const size_t elt_size = ggml_element_size(kv_self.k);
-            ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
+	    ggml_init_params ip = {
 	      .mem_size   = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
 	      .mem_buffer =NULL,
 	      .no_alloc = /* no_alloc */ true
 	    };
            ggml_context * cpy_ctx = ggml_init( ip);
            ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
            ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
@ -9126,7 +9135,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
            const size_t elt_size = ggml_element_size(kv_self.k);
-            ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
+	    ggml_init_params ip {
 	      .mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
 	      .mem_buffer=NULL,
 	      .no_alloc=true };
            ggml_context * cpy_ctx = ggml_init(ip);
            ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
            ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);