bindings

2023-11-23 09:55:19 -05:00 · 2023-11-23 09:55:19 -05:00 · df647db611
commit df647db611
parent a08640c00d
17 changed files with 568 additions and 182 deletions
--- a/3
+++ b/3
@ -1,3 +1,6 @@
 tt:
 	clang++ -std=c++17 ggml.cpp
 # Define the default target now so that it is always the first target
 BUILD_TARGETS = \
 	main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@ -1527,11 +1527,14 @@ int main(int argc, char ** argv) {
    std::vector<uint8_t> work_buffer;
    for (int ex=0; ex<n_examples; ++ex) {
-        struct ggml_init_params params = {
+      struct ggml_init_params params(
-            .mem_size   = compute_size,
+				     //.mem_size   =
-            .mem_buffer = compute_addr,
+				     compute_size,
-            .no_alloc   = false,
+				     //.mem_buffer =
-        };
+				     compute_addr,
 				     //.no_alloc   =
 				     false
 				     );
        struct ggml_context * ctx0 = ggml_init(params);
@ -1602,11 +1605,14 @@ int main(int argc, char ** argv) {
        }
        printf("---\n");
        for (int i=0; i<n_gen; ++i) {
-            struct ggml_init_params params = {
+	  struct ggml_init_params params(
-                .mem_size   = compute_size,
+					 //.mem_size   =
-                .mem_buffer = compute_addr,
+					 compute_size,
-                .no_alloc   = false,
+					 //.mem_buffer =
-            };
+					 compute_addr,
 					 //.no_alloc   =
 					 false
 					 );
            struct ggml_context * ctx0 = ggml_init(params);
            ggml_cgraph gf = {};
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@ -140,11 +140,14 @@ int main(int argc, char ** argv)  {
    printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
-    struct ggml_init_params params = {
+    struct ggml_init_params params(
-        .mem_size   = ctx_size,
+				   //.mem_size   =
-        .mem_buffer = NULL,
+				   ctx_size,
-        .no_alloc   = 0
+				   //.mem_buffer =
-    };
+				   NULL,
 				   //.no_alloc   =
 				   0
 				   );
    ctx = ggml_init(params);
    if (!ctx) {
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@ -553,10 +553,12 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
    if (is_ggml_file(filename)) {
        struct ggml_context * ctx_data = NULL;
-        struct gguf_init_params params = {
+        struct gguf_init_params params(
-	  .no_alloc =  false,
+				       //.no_alloc =
-	  .ctx      =  &ctx_data,
+				       false,
-        };
+				       //.ctx      =
 				       &ctx_data
 				       );
        struct gguf_context * ctx = gguf_init_from_file(filename, params);
        GGML_ASSERT(ctx != NULL);
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@ -389,9 +389,11 @@ static void export_lora(struct export_lora_params * params) {
    // open base model gguf, read tensors without their data
    struct ggml_context * ctx_in;
-    struct gguf_init_params params_gguf;
+    struct gguf_init_params params_gguf(
-    params_gguf.no_alloc = true;
+					//params_gguf.no_alloc =
-    params_gguf.ctx      = &ctx_in;
+					true,
 					//params_gguf.ctx      =
 					&ctx_in);
    struct gguf_context * gguf_in = gguf_init_from_file(params->fn_model_base.c_str(), params_gguf);
    // create new gguf
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@ -294,10 +294,12 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
    // get parameters directly from gguf file
    {
-        struct gguf_init_params params = {
+      struct gguf_init_params params(
-	  .no_alloc = false,
+				     //.no_alloc =
-	  .ctx      = NULL,
+				     false,
-        };
+				     //.ctx      =
 				     NULL
 				     );
        struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
        load_model_hparams_gguf(mctx, &hparams, "llama");
@ -991,9 +993,11 @@ static void save_checkpoint_lora_gguf(struct gguf_context * fctx, struct my_llam
 static bool load_checkpoint_lora_file(const char * filename, struct my_llama_model * model, struct my_llama_lora * lora, struct train_state * train) {
    struct ggml_context * f_ggml_ctx;
-    struct gguf_init_params params;
+    struct gguf_init_params params(
-    params.no_alloc = false;
+				   //params.no_alloc =
-    params.ctx = &f_ggml_ctx;
+				   false,
 				   //params.ctx =
 				   &f_ggml_ctx);
    struct gguf_context * fctx = gguf_init_from_file(filename, params);
    if (fctx == NULL) {
        return false;
@ -1708,11 +1712,14 @@ int main(int argc, char ** argv) {
    std::vector<uint8_t> mem_compute_data;
    // context for input tensors without their data
-    struct ggml_init_params ctx_input_params = {
+    struct ggml_init_params ctx_input_params(
-      .mem_size= ggml_tensor_overhead() * 2, // mem_size
+					     //.mem_size=
-      .mem_buffer=NULL,                       // mem_buffer
+					     ggml_tensor_overhead() * 2, // mem_size
-      .no_alloc=true,                       // no_alloc
+					     //.mem_buffer=
-    };
+					     NULL,                       // mem_buffer
 					     //.no_alloc=
 					     true                       // no_alloc
 					     );
    struct ggml_context * ctx_input = ggml_init(ctx_input_params);
    // the input tensors
@ -1737,11 +1744,14 @@ int main(int argc, char ** argv) {
            2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() +
            (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
    );
-    struct ggml_init_params ctx_compute_params = {
+    struct ggml_init_params ctx_compute_params(
-      .mem_size=estimated_compute_size_wo_data, // mem_size
+					       //.mem_size=
-      .mem_buffer=NULL,                           // mem_buffer
+					       estimated_compute_size_wo_data, // mem_size
-      .no_alloc=true,                           // no_alloc
+					       //.mem_buffer=
-    };
+					       NULL,                           // mem_buffer
 					       //.no_alloc=
 					       true                           // no_alloc
 					       );
    struct ggml_context * ctx_compute = NULL;
    struct ggml_tensor * loss   = NULL;
@ -1904,11 +1914,14 @@ int main(int argc, char ** argv) {
    printf("%s: work_size = %zu bytes (%.1f MB)\n", __func__, max_work_size, (float) max_work_size / (1024.0f*1024.0f));
    // context for work buffer
-    struct ggml_init_params ctx_work_params = {
+    struct ggml_init_params ctx_work_params(
-      .mem_size= max_work_size, // mem_size
+					    //.mem_size=
-      .mem_buffer = NULL,          // mem_buffer
+					    max_work_size, // mem_size
-      .no_alloc  = false,         // no_alloc
+					    //.mem_buffer =
-    };
+					    NULL,          // mem_buffer
 					    //.no_alloc  =
 					    false         // no_alloc
 					    );
    struct ggml_context * ctx_work = ggml_init(ctx_work_params);
    int64_t t0 = ggml_time_ms();
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@ -255,11 +255,14 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
    const auto & buf_compute = ctx->buf_compute;
-    struct ggml_init_params params = {
+    struct ggml_init_params params(
-      .mem_size = buf_compute.size,
+				   //.mem_size =
-      .mem_buffer = buf_compute.data,
+				   buf_compute.size,
-      .no_alloc = false,
+				   //.mem_buffer =
-    };
+				   buf_compute.data,
 				   //.no_alloc =
 				   false
 				   );
    params.no_alloc = true;
@ -455,10 +458,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
    struct ggml_context * meta = NULL;
-    struct gguf_init_params params = {
+    struct gguf_init_params params(
-      .no_alloc =  true,
+				   //.no_alloc =
-      .ctx      =  &meta,
+				   true,
-    };
+				   //.ctx      =
 				   &meta);
    struct gguf_context * ctx = gguf_init_from_file(fname, params);
    if (!ctx) {
@ -552,11 +557,14 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
    // load tensors
    {
-        struct ggml_init_params params = {
+      struct ggml_init_params params(
-	  .mem_size = ctx_size,
+				     //.mem_size =
-	  .mem_buffer = NULL,
+				     ctx_size,
-          .no_alloc = false,
+				     //.mem_buffer =
-        };
+				     NULL,
 				     //.no_alloc =
 				     false
 				     );
        new_clip->ctx = ggml_init(params);
        if (!new_clip->ctx) {
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -600,10 +600,12 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
    // set vocab by copying from vocab_model gguf file
    {
-        struct gguf_init_params params = {
+      struct gguf_init_params params(
-	  .no_alloc =  false,
+	  //.no_alloc =
-	  .ctx      = NULL,
+	  false,
-        };
+	  //.ctx      =
 	  NULL
 				     );
        struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
        const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST));
@ -745,9 +747,11 @@ static void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_voc
 static bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct train_state * train) {
    struct ggml_context * f_ggml_ctx;
-    struct gguf_init_params params;
+    struct gguf_init_params params(
-    params.no_alloc = false;
+				   //params.no_alloc =
-    params.ctx = &f_ggml_ctx;
+				   false,
 				   //params.ctx =
 				   &f_ggml_ctx);
    struct gguf_context * fctx = gguf_init_from_file(filename, params);
    if (fctx == NULL) {
        return false;
@ -1085,11 +1089,14 @@ int main(int argc, char ** argv) {
    ggml_allocr * alloc = NULL;
    // context for input tensors without their data
-    struct ggml_init_params ctx_input_params = {
+    struct ggml_init_params ctx_input_params (
-        .mem_size = ggml_tensor_overhead() * 2, // mem_size
+					      //.mem_size =
-        .mem_buffer = NULL,                       // mem_buffer
+					      ggml_tensor_overhead() * 2, // mem_size
-        .no_alloc = true,                       // no_alloc
+					      //       .mem_buffer =
-    };
+					      NULL,                       // mem_buffer
 					      //       .no_alloc =
 					      true                       // no_alloc
 					      );
    struct ggml_context * ctx_input = ggml_init(ctx_input_params);
    // the input tensors
@ -1114,11 +1121,14 @@ int main(int argc, char ** argv) {
            2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() +
            (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
    );
-    struct ggml_init_params ctx_compute_params = {
+    struct ggml_init_params ctx_compute_params(
-      .mem_size = estimated_compute_size_wo_data, // mem_size
+					       //    .mem_size =
-      .mem_buffer= NULL,                           // mem_buffer
+					       estimated_compute_size_wo_data, // mem_size
-      .no_alloc = true,                           // no_alloc
+					       //.mem_buffer=
-    };
+					       NULL,                           // mem_buffer
 					       //.no_alloc =
 					       true                           // no_alloc
 					       );
    struct ggml_context * ctx_compute = NULL;
    struct ggml_tensor * loss   = NULL;
@ -1267,11 +1277,14 @@ int main(int argc, char ** argv) {
    printf("%s: work_size = %zu bytes (%.1f MB)\n", __func__, max_work_size, (float) max_work_size / (1024.0f*1024.0f));
    // context for work buffer
-    struct ggml_init_params ctx_work_params = {
+    struct ggml_init_params ctx_work_params(
-      .mem_size= max_work_size, // 
+					    //.mem_size=
-      .mem_buffer= NULL,          // 
+					    max_work_size, // 
-      .no_alloc=false,         // 
+					    //.mem_buffer=
-    };
+					    NULL,          // 
 					    //.no_alloc=
 					    false         // 
 					    );
    struct ggml_context * ctx_work = ggml_init(ctx_work_params);
    int64_t t0 = ggml_time_ms();
--- a/ggml-backend.cpp
+++ b/ggml-backend.cpp
@ -586,11 +586,14 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
    memset(sched->node_copies,   0, sizeof(sched->node_copies[0])   * hash_size);
    sched->n_splits = 0;
-    struct ggml_init_params params = {
+    struct ggml_init_params params(
-        .mem_size =   sizeof(sched->context_buffer),
+				   //.mem_size =
-        .mem_buffer =  sched->context_buffer,
+				   sizeof(sched->context_buffer),
-        .no_alloc =    true
+				   //.mem_buffer =
-    };
+				   sched->context_buffer,
 				   //.no_alloc =
 				   true
 				   );
    if (sched->ctx != NULL) {
        ggml_free(sched->ctx);
--- a/ggml.cpp
+++ b/ggml.cpp
@ -16446,7 +16446,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
        /*.abort_callback          =*/ NULL,
        /*.abort_callback_data     =*/ NULL,
    };
-    struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads);
+    struct ggml_compute_state * workers = (struct ggml_compute_state *)alloca(sizeof(struct ggml_compute_state)*n_threads);
    // create thread pool
    if (n_threads > 1) {
@ -16775,11 +16775,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
            const size_t overhead = 1*ggml_tensor_overhead();
 	    GGML_ASSERT(0);
 	    // FIXME
-            struct ggml_init_params params;// = {
+            struct ggml_init_params params(
-            params.mem_size   = fsize + overhead,
+					   fsize + overhead,
-            params.mem_buffer = NULL,
+					   NULL,
-            params.no_alloc   = false,
+					   false);
-            // };
+
            *ctx_data = ggml_init(params);
@ -16831,10 +16831,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
        {
            const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false);
-            struct ggml_init_params params;// = {
+            struct ggml_init_params params(
-            params.mem_size   = size_eval + overhead,
+					   size_eval + overhead,
-            params.mem_buffer = NULL,
+NULL,
-            params.no_alloc   = true,
+					   true);
            *ctx_eval = ggml_init(params);
@ -17974,7 +17974,7 @@ GGML_API void ggml_opt_init(
    opt->nx = nx;
    opt->just_initialized = true;
    if (opt->ctx == NULL) {
-        struct ggml_init_params ctx_opt_params;
+      struct ggml_init_params ctx_opt_params;
        if (opt->params.type == GGML_OPT_ADAM) {
            ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
            if (opt->params.past > 0) {
@ -18690,10 +18690,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
 	// FIXME
-        struct ggml_init_params pdata;
+        struct ggml_init_params pdata(
-        pdata.mem_size   = mem_size,
+				      mem_size,
-        pdata.mem_buffer = NULL,
+				      NULL,
-        pdata.no_alloc   = params.no_alloc,
+				      params.no_alloc);
        *params.ctx = ggml_init(pdata);
--- a/ggml.h
+++ b/ggml.h
@ -286,7 +286,7 @@
    GGML_UNUSED(prefix##3);
 #ifdef  __cplusplus
-extern "C" {
+//extern "C" {
 #endif
 #if defined(__ARM_NEON) && defined(__CUDACC__)
@ -581,6 +581,18 @@ extern "C" {
    };
    struct ggml_init_params : refl::attr::usage::type{
      ggml_init_params(size_t mem_size,
 		       void * mem_buffer,
 		       bool   no_alloc):
 	mem_size( mem_size),
        mem_buffer(mem_buffer),
        no_alloc(no_alloc){}
      ggml_init_params():
 	mem_size(0),
        mem_buffer(0),
        no_alloc(0){}
        // memory pool
        size_t mem_size;   // bytes
        void * mem_buffer; // if NULL, memory will be allocated internally
@ -2013,6 +2025,8 @@ extern "C" {
    struct gguf_context;
    struct gguf_init_params : refl::attr::usage::type{
      gguf_init_params(bool no_alloc, struct ggml_context ** ctx): no_alloc(no_alloc),ctx(ctx){}
        bool no_alloc;
        // if not NULL, create a ggml_context and allocate the tensor data in it
@ -2164,5 +2178,5 @@ extern "C" {
    GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
 #ifdef  __cplusplus
-}
+//}
 #endif
--- a/llama.cpp
+++ b/llama.cpp
@ -1785,10 +1785,10 @@ struct llama_model_loader {
    struct ggml_context * ctx_meta = NULL;
    llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
-	struct gguf_init_params params = {
+      struct gguf_init_params params(
-	  .no_alloc =  true,
+				     /*.no_alloc =*/  true,
-	  .ctx      =  &ctx_meta,
+				     /*.ctx      = */ &ctx_meta
-	};
+				     );
 	ctx_gguf = gguf_init_from_file(fname.c_str(), params);
 	if (!ctx_gguf) {
@ -2676,11 +2676,12 @@ static void llm_load_tensors(
 	    model.mlock_buf.grow_to(model.buf.size);
 	}
-	struct ggml_init_params params = {
+	struct ggml_init_params params(
-	    .mem_size   = model.buf.size,
+				       model.buf.size,
-	    .mem_buffer = model.buf.data,
+				       model.buf.data,
-	    .no_alloc   = ml.use_mmap,
+				       
-	};
+
 				       ml.use_mmap				       );
 	model.ctx = ggml_init(params);
 	if (!model.ctx) {
@ -3842,11 +3843,14 @@ struct llm_build_context {
        }
    void init() {
-        struct ggml_init_params params = {
+      struct ggml_init_params params(
-	  .mem_size   = buf_compute.size,
+				     //.mem_size   =
-          .mem_buffer = buf_compute.data,
+				     buf_compute.size,
-          .no_alloc   = true,
+				     //.mem_buffer =
-        };
+				     buf_compute.data,
 				     //.no_alloc   =
 				     true
 				     );
        ctx0 = ggml_init(params);
    }
@ -8447,10 +8451,11 @@ void llama_backend_init(bool numa) {
    // needed to initialize f16 tables
    {
-        struct ggml_init_params params = { .mem_size = 0,
+      struct ggml_init_params params(
-					   .mem_buffer = NULL,
+				     0,
-					   .no_alloc = false
+				     NULL,
-	};
+				     false
 				     );
        struct ggml_context * ctx = ggml_init(params);
        ggml_free(ctx);
    }
@ -9021,11 +9026,14 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
        if (kv_buf_size) {
            const size_t elt_size = ggml_element_size(kv_self.k);
-	    ggml_init_params ip = {
+	    ggml_init_params ip(
-	      .mem_size   = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
+				//.mem_size   =
-	      .mem_buffer =NULL,
+				6*ggml_tensor_overhead() + ggml_graph_overhead(),
-	      .no_alloc = /* no_alloc */ true
+				//.mem_buffer =
-	    };
+				NULL,
 				//.no_alloc = /* no_alloc */
 				true
 				);
            ggml_context * cpy_ctx = ggml_init( ip);
            ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
@ -9155,10 +9163,13 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
            const size_t elt_size = ggml_element_size(kv_self.k);
-	    ggml_init_params ip {
+	    ggml_init_params ip(
-	      .mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
+				//.mem_size=
-	      .mem_buffer=NULL,
+				6*ggml_tensor_overhead() + ggml_graph_overhead(),
-	      .no_alloc=true };
+				//.mem_buffer=
 				NULL,
 				//.no_alloc=
 				true );
            ggml_context * cpy_ctx = ggml_init(ip);
            ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
--- a/llama.h
+++ b/llama.h
@ -50,7 +50,7 @@
 #endif
 #ifdef __cplusplus
-extern "C" {
+//extern "C" {
 #endif
    //
@ -189,7 +189,7 @@ extern "C" {
        llama_seq_id all_seq_id; // used if seq_id == NULL
    } llama_batch;
-    struct llama_model_params : refl::attr::usage::type{
+    struct llama_model_params {
        int32_t n_gpu_layers; // number of layers to store in VRAM
        int32_t main_gpu;     // the GPU that is used for scratch and small tensors
        const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
@ -268,7 +268,7 @@ extern "C" {
        LLAMA_GRETYPE_CHAR_ALT       = 6,
    };
-    typedef struct llama_grammar_element : refl::attr::usage::type{
+    typedef struct llama_grammar_element : refl::attr::usage::type {
      llama_grammar_element(        enum llama_gretype type,
 				    uint32_t           value // Unicode code point or rule ID
 				    ):type(type), value(value){}
@ -811,7 +811,7 @@ extern "C" {
    LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);
 #ifdef __cplusplus
-}
+//}
 #endif
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
@ -828,6 +828,6 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
 #endif // LLAMA_API_INTERNAL
-template<typename T> void print_fields(const T& obj);
+
 #endif // LLAMA_H
--- a/print.hpp
+++ b/print.hpp
@ -4,65 +4,373 @@
 //#include <refl.hpp>
 #include "llama.h"
 REFL_TYPE(ggml_object)
 REFL_END
 REFL_TYPE(ggml_tensor)
 REFL_END
 REFL_TYPE(ggml_cplan )
 REFL_END
 REFL_TYPE(ggml_hash_set )
 REFL_END
 REFL_TYPE(ggml_cgraph )
 REFL_END
 REFL_TYPE(ggml_scratch )
 REFL_END
 REFL_TYPE(ggml_init_params )
 REFL_END
-REFL_TYPE(ggml_compute_params )
+
 REFL_END
 REFL_TYPE(ggml_opt_params )
 REFL_END
 REFL_TYPE(ggml_opt_params::ggml_adam)
 REFL_END
 REFL_TYPE(ggml_opt_params::ggml_lbfgs)
 REFL_END
-REFL_TYPE(ggml_opt_context )
+
-REFL_END
+
 REFL_TYPE(ggml_opt_context::ggml_grad )
 REFL_END
 REFL_TYPE(gpt_params )
 REFL_END
-REFL_TYPE(gguf_init_params )
+
-REFL_END
+
 REFL_TYPE(ggml_something )
 REFL_END
 REFL_TYPE(llama_sampling_context )
 REFL_END
 REFL_TYPE(llama_token_data )
 REFL_END
-REFL_TYPE(llama_model )
+
-REFL_END
+
 REFL_TYPE(llama_token_data_array )
 REFL_END
 REFL_TYPE(llama_batch )
 REFL_END
-REFL_TYPE(llama_model_params )
+
 REFL_TYPE(ggml_object)
  REFL_FIELD(offs)
 REFL_END
-REFL_TYPE(llama_context_params )
+
 REFL_TYPE(ggml_tensor)
  REFL_FIELD(type)
 REFL_END
-REFL_TYPE(llama_context )
+
 REFL_TYPE(ggml_cplan)
  REFL_FIELD(work_size)
 REFL_END
-REFL_TYPE(llama_model_quantize_params )
+
 REFL_TYPE(ggml_hash_set)
  REFL_FIELD(size)
 REFL_END
-REFL_TYPE(llama_grammar_element )
+
 REFL_TYPE(ggml_cgraph)
  REFL_FIELD(size)
 REFL_END
-REFL_TYPE(llama_timings )
+
 REFL_TYPE(ggml_scratch)
  REFL_FIELD(offs)
 REFL_END
-REFL_TYPE(llama_beam_view )
+
 REFL_TYPE(ggml_compute_params)
  REFL_FIELD(type)
 REFL_END
-REFL_TYPE(llama_beams_state )
+
 REFL_TYPE(ggml_opt_params)
  REFL_FIELD(type)
 REFL_END
 REFL_TYPE(ggml_opt_context)
  REFL_FIELD(ctx)
 REFL_END
 //REFL_TYPE(gguf_context)
 //REFL_END
 REFL_TYPE(gguf_init_params)
 REFL_END
 REFL_TYPE(ggml_something)
  REFL_FIELD(type_name)
 REFL_END
 //REFL_TYPE()
 //  REFL_FIELD(d)
 //REFL_TYPE()
 // incomplete ttype
 // REFL_TYPE(ggml_context)
 //   REFL_FIELD(mem_size)
 //   REFL_FIELD(mem_buffer)
 // REFL_END
 //REFL_TYPE(ggml_context_container)
 //  REFL_FIELD(used)
 //  REFL_FIELD(context)
 //REFL_END
 // REFL_TYPE(ggml_numa_node)
 //   REFL_FIELD(cpus)
 //   REFL_FIELD(n_cpus)
 // REFL_END
 // REFL_TYPE(ggml_numa_nodes)
 //   REFL_FIELD(nodes)
 //   REFL_FIELD(n_nodes)
 // REFL_END
 // REFL_TYPE(ggml_state)
 //   REFL_FIELD(contexts)
 //   REFL_FIELD(numa)
 //   REFL_END
 // REFL_TYPE(gguf_str)
 //   REFL_FIELD(n)
 //   REFL_FIELD(data)
 // REFL_END
 // REFL_TYPE(ggml_map_custom1_op_params)
 //   REFL_FIELD(fun)
 //   REFL_FIELD(n_tasks)
 // REFL_END
 // REFL_TYPE(ggml_map_custom2_op_params)
 //   REFL_FIELD(fun)
 //   REFL_FIELD(n_tasks)
 // REFL_END
 // REFL_TYPE(ggml_map_custom3_op_params)
 //   REFL_FIELD(fun)
 //   REFL_FIELD(n_tasks)
 // REFL_END
 // REFL_TYPE(hash_map)
 //   REFL_FIELD(set)
 //   REFL_FIELD(vals)
 // REFL_END
 // REFL_TYPE(ggml_compute_state_shared)
 //   REFL_FIELD(cgraph)
 //   REFL_FIELD(cplan)
 // REFL_END
 // REFL_TYPE(ggml_compute_state)
 //   REFL_FIELD(thrd)
 //   REFL_FIELD(ith)
 // REFL_END
 // REFL_TYPE(ggml_lbfgs_iteration_data)
 //   REFL_FIELD(alpha)
 //   REFL_FIELD(ys)
 // REFL_END
 //REFL_TYPE()
 //  REFL_FIELD(type)
 //REFL_END
 // REFL_TYPE(gguf_kv)
 //   REFL_FIELD(key)
 //   REFL_FIELD(type)
 // REFL_END
 // REFL_TYPE(gguf_header)
 //   REFL_FIELD(magic)
 //   REFL_FIELD(version)
 // REFL_END
 // REFL_TYPE(gguf_tensor_info)
 //   REFL_FIELD(name)
 //   REFL_FIELD(n_dims)
 // REFL_END
 REFL_TYPE(gguf_context)
 //  REFL_FIELD(header)
 //  REFL_FIELD(kv)
 REFL_END
 // REFL_TYPE(gguf_buf)
 //   REFL_FIELD(data)
 //   REFL_FIELD(size)
 // REFL_END
 //REFL_TYPE(llama_token_data)
 //REFL_END
 REFL_TYPE(llama_model_params)
  REFL_FIELD(n_gpu_layers)
 REFL_END
 REFL_TYPE(llama_context_params)
  REFL_FIELD(seed)
 REFL_END
 REFL_TYPE(llama_model_quantize_params)
  REFL_FIELD(nthread)
 REFL_END
 REFL_TYPE(llama_grammar_element)
 REFL_END
 REFL_TYPE(llama_timings)
  REFL_FIELD(t_start_ms)
 REFL_END
 REFL_TYPE(llama_beam_view)
  REFL_FIELD(tokens)
 REFL_END
 REFL_TYPE(llama_beams_state)
  REFL_FIELD(beam_views)
 REFL_END
 //REFL_TYPE(ggml_backend)
 //REFL_END
 REFL_TYPE(ggml_backend_buffer)
 REFL_END
 //REFL_TYPE(ggml_allocr)
 //REFL_END
 //REFL_TYPE(ggml_tallocr)
 //REFL_END
 //REFL_TYPE(ggml_gallocr)
 //REFL_END
 //REFL_TYPE(llama_buffer)
 //REFL_FIELD(data)
 //REFL_FIELD(size)
 //REFL_END
 // REFL_TYPE(llama_file)
 // REFL_FIELD(fp)
 // REFL_FIELD(size)
 // REFL_END
 // REFL_TYPE(llama_mmap)
 // REFL_FIELD(addr)
 // REFL_FIELD(size)
 // REFL_END
 // REFL_TYPE(llama_mlock)
 //   REFL_FIELD(addr)
 //   REFL_FIELD(size)
 // REFL_END
 //REFL_TYPE(llama_state)
 //  REFL_FIELD(log_callback)
 //  REFL_FIELD(log_callback_user_data)
 //  REFL_END
 // REFL_TYPE(llama_hparams)
 //   REFL_FIELD(vocab_only)
 //   REFL_FIELD(n_vocab)
 //   REFL_END
 //REFL_TYPE(llama_cparams)
 //  REFL_FIELD(n_ctx)
 //  REFL_FIELD(n_batch)
 //REFL_END
 //REFL_TYPE(llama_layer)
 //  REFL_FIELD(attn_norm)
 //  REFL_FIELD(attn_norm_b)
 //REFL_END
 // REFL_TYPE(llama_kv_cell)
 //   REFL_FIELD(pos)
 //   REFL_FIELD(delta)
 // REFL_END
 // REFL_TYPE(llama_kv_cache)
 //   REFL_FIELD(has_shift)
 //   REFL_FIELD(head)
 // REFL_END
 // REFL_TYPE(llama_vocab)
 // REFL_END
 REFL_TYPE(llama_model)
 //  REFL_FIELD(type)
 //  REFL_FIELD(arch)
 REFL_END
 REFL_TYPE(llama_context)
 REFL_END
 // REFL_TYPE(llama_model_loader)
 //   REFL_FIELD(n_kv)
 //   REFL_FIELD(n_tensors)
 // REFL_END
 // REFL_TYPE(llm_build_context)
 //   REFL_FIELD(model)
 //   REFL_FIELD(hparams)
 // REFL_END
 // REFL_TYPE(llm_offload_trie)
 // REFL_END
 // REFL_TYPE(llm_symbol)
 //   REFL_FIELD(prev)
 // REFL_END
 // REFL_TYPE(llm_bigram_spm)
 // REFL_END
 // REFL_TYPE(llm_tokenizer_spm)
 // REFL_END
 // REFL_TYPE(llm_bigram_bpe)
 // REFL_END
 // REFL_TYPE(llm_tokenizer_bpe)
 // REFL_END
 // REFL_TYPE(fragment_buffer_variant)
 // REFL_END
 // REFL_TYPE(llama_partial_utf8)
 //   REFL_FIELD(value)
 //   REFL_FIELD(n_remain)
 // REFL_END
 REFL_TYPE(llama_grammar)
 //  REFL_FIELD(rules)
 //  REFL_FIELD(stacks)
 REFL_END
 //REFL_TYPE(llama_grammar_candidate)
 //  REFL_FIELD(index)
 //  REFL_FIELD(code_points)
 //REFL_END
 // REFL_TYPE(llama_beam)
 //   REFL_FIELD(tokens)
 //   REFL_FIELD(p)
 // REFL_END
 // REFL_TYPE(llama_logit_info)
 //   REFL_FIELD(logits)
 //   REFL_FIELD(n_vocab)
 // REFL_END
 // REFL_TYPE(llama_beam_search_data)
 //   REFL_FIELD(ctx)
 //   REFL_FIELD(n_beams)
 // REFL_END
 // REFL_TYPE(quantize_state_internal)
 //   REFL_FIELD(model)
 //   REFL_FIELD(params)
 // REFL_END
 // REFL_TYPE(llama_data_context)
 // REFL_END
 // REFL_TYPE(llama_data_buffer_context)
 //   REFL_FIELD(ptr)
 // REFL_END
 // REFL_TYPE(llama_data_file_context)
 //   REFL_FIELD(file)
 // REFL_END
 // // A simple struct with some fields and a function
 // // A custom attribute to mark some fields as hidden
 struct hidden : refl::attr::usage::field {};
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@ -115,11 +115,11 @@ int main(int argc, char * argv[]) {
    generate_data(1.0, test_data2.size(), test_data2.data());
    // Initialize GGML, ensures float conversion tables are initialized
-    struct ggml_init_params ggml_params = {
+    struct ggml_init_params ggml_params(
        /* .mem_size   = */ 1*1024,
        /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ true,
+        /* .no_alloc   = */ true
-    };
+					);
    struct ggml_context * ctx = ggml_init(ggml_params);
    int num_failed = 0;
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@ -261,11 +261,11 @@ int main(int argc, char * argv[]) {
    // Initialize GGML, ensures float conversion tables are initialized
-    struct ggml_init_params ggml_params = {
+    struct ggml_init_params ggml_params(
        /* .mem_size   = */ 1*1024,
        /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ true,
+        /* .no_alloc   = */ true
-    };
+					);
    struct ggml_context * ctx = ggml_init(ggml_params);
    for (int i = 0; i < GGML_TYPE_COUNT; i++) {
--- a/tests/test-rope.cpp
+++ b/tests/test-rope.cpp
@ -124,11 +124,11 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
 }
 int main(int /*argc*/, const char ** /*argv*/) {
-    struct ggml_init_params params = {
+  struct ggml_init_params params(
        /* .mem_size   = */ 128*1024*1024,
        /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ false,
+        /* .no_alloc   = */ false
-    };
+				 );
    std::vector<uint8_t> work_buffer;