From df647db61179043798867dd214a927ffb5ffffc0 Mon Sep 17 00:00:00 2001
From: mike dupont <mike.dupont@introspector.local>
Date: Thu, 23 Nov 2023 09:55:19 -0500
Subject: [PATCH] bindings

---
 Makefile                                      |   3 +
 examples/baby-llama/baby-llama.cpp            |  26 +-
 examples/benchmark/benchmark-matmult.cpp      |  13 +-
 .../convert-llama2c-to-ggml.cpp               |  10 +-
 examples/export-lora/export-lora.cpp          |   8 +-
 examples/finetune/finetune.cpp                |  57 ++-
 examples/llava/clip.cpp                       |  36 +-
 .../train-text-from-scratch.cpp               |  57 ++-
 ggml-backend.cpp                              |  13 +-
 ggml.cpp                                      |  30 +-
 ggml.h                                        |  18 +-
 llama.cpp                                     |  65 +--
 llama.h                                       |  10 +-
 print.hpp                                     | 386 ++++++++++++++++--
 tests/test-quantize-fns.cpp                   |   6 +-
 tests/test-quantize-perf.cpp                  |   6 +-
 tests/test-rope.cpp                           |   6 +-
 17 files changed, 568 insertions(+), 182 deletions(-)

diff --git a/Makefile b/Makefile
index 6b941d622..93c23772c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,6 @@
+tt:
+	clang++ -std=c++17 ggml.cpp
+
 # Define the default target now so that it is always the first target
 BUILD_TARGETS = \
 	main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 697e003e3..b2679a9d9 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -1527,11 +1527,14 @@ int main(int argc, char ** argv) {
     std::vector<uint8_t> work_buffer;
 
     for (int ex=0; ex<n_examples; ++ex) {
-        struct ggml_init_params params = {
-            .mem_size   = compute_size,
-            .mem_buffer = compute_addr,
-            .no_alloc   = false,
-        };
+      struct ggml_init_params params(
+				     //.mem_size   =
+				     compute_size,
+				     //.mem_buffer =
+				     compute_addr,
+				     //.no_alloc   =
+				     false
+				     );
 
         struct ggml_context * ctx0 = ggml_init(params);
 
@@ -1602,11 +1605,14 @@ int main(int argc, char ** argv) {
         }
         printf("---\n");
         for (int i=0; i<n_gen; ++i) {
-            struct ggml_init_params params = {
-                .mem_size   = compute_size,
-                .mem_buffer = compute_addr,
-                .no_alloc   = false,
-            };
+	  struct ggml_init_params params(
+					 //.mem_size   =
+					 compute_size,
+					 //.mem_buffer =
+					 compute_addr,
+					 //.no_alloc   =
+					 false
+					 );
             struct ggml_context * ctx0 = ggml_init(params);
 
             ggml_cgraph gf = {};
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index f3acf14dd..924da92a7 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -140,11 +140,14 @@ int main(int argc, char ** argv)  {
 
     printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
 
-    struct ggml_init_params params = {
-        .mem_size   = ctx_size,
-        .mem_buffer = NULL,
-        .no_alloc   = 0
-    };
+    struct ggml_init_params params(
+				   //.mem_size   =
+				   ctx_size,
+				   //.mem_buffer =
+				   NULL,
+				   //.no_alloc   =
+				   0
+				   );
 
     ctx = ggml_init(params);
     if (!ctx) {
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 9719eabde..aea3c767f 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -553,10 +553,12 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
     if (is_ggml_file(filename)) {
         struct ggml_context * ctx_data = NULL;
 
-        struct gguf_init_params params = {
-	  .no_alloc =  false,
-	  .ctx      =  &ctx_data,
-        };
+        struct gguf_init_params params(
+				       //.no_alloc =
+				       false,
+				       //.ctx      =
+				       &ctx_data
+				       );
 
         struct gguf_context * ctx = gguf_init_from_file(filename, params);
         GGML_ASSERT(ctx != NULL);
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index c8754ce70..8858c0cca 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -389,9 +389,11 @@ static void export_lora(struct export_lora_params * params) {
 
     // open base model gguf, read tensors without their data
     struct ggml_context * ctx_in;
-    struct gguf_init_params params_gguf;
-    params_gguf.no_alloc = true;
-    params_gguf.ctx      = &ctx_in;
+    struct gguf_init_params params_gguf(
+					//params_gguf.no_alloc =
+					true,
+					//params_gguf.ctx      =
+					&ctx_in);
     struct gguf_context * gguf_in = gguf_init_from_file(params->fn_model_base.c_str(), params_gguf);
 
     // create new gguf
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index cda68b76d..3b5fefda8 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -294,10 +294,12 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
 
     // get parameters directly from gguf file
     {
-        struct gguf_init_params params = {
-	  .no_alloc = false,
-	  .ctx      = NULL,
-        };
+      struct gguf_init_params params(
+				     //.no_alloc =
+				     false,
+				     //.ctx      =
+				     NULL
+				     );
         struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
 
         load_model_hparams_gguf(mctx, &hparams, "llama");
@@ -991,9 +993,11 @@ static void save_checkpoint_lora_gguf(struct gguf_context * fctx, struct my_llam
 
 static bool load_checkpoint_lora_file(const char * filename, struct my_llama_model * model, struct my_llama_lora * lora, struct train_state * train) {
     struct ggml_context * f_ggml_ctx;
-    struct gguf_init_params params;
-    params.no_alloc = false;
-    params.ctx = &f_ggml_ctx;
+    struct gguf_init_params params(
+				   //params.no_alloc =
+				   false,
+				   //params.ctx =
+				   &f_ggml_ctx);
     struct gguf_context * fctx = gguf_init_from_file(filename, params);
     if (fctx == NULL) {
         return false;
@@ -1708,11 +1712,14 @@ int main(int argc, char ** argv) {
     std::vector<uint8_t> mem_compute_data;
 
     // context for input tensors without their data
-    struct ggml_init_params ctx_input_params = {
-      .mem_size= ggml_tensor_overhead() * 2, // mem_size
-      .mem_buffer=NULL,                       // mem_buffer
-      .no_alloc=true,                       // no_alloc
-    };
+    struct ggml_init_params ctx_input_params(
+					     //.mem_size=
+					     ggml_tensor_overhead() * 2, // mem_size
+					     //.mem_buffer=
+					     NULL,                       // mem_buffer
+					     //.no_alloc=
+					     true                       // no_alloc
+					     );
     struct ggml_context * ctx_input = ggml_init(ctx_input_params);
 
     // the input tensors
@@ -1737,11 +1744,14 @@ int main(int argc, char ** argv) {
             2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() +
             (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
     );
-    struct ggml_init_params ctx_compute_params = {
-      .mem_size=estimated_compute_size_wo_data, // mem_size
-      .mem_buffer=NULL,                           // mem_buffer
-      .no_alloc=true,                           // no_alloc
-    };
+    struct ggml_init_params ctx_compute_params(
+					       //.mem_size=
+					       estimated_compute_size_wo_data, // mem_size
+					       //.mem_buffer=
+					       NULL,                           // mem_buffer
+					       //.no_alloc=
+					       true                           // no_alloc
+					       );
     struct ggml_context * ctx_compute = NULL;
 
     struct ggml_tensor * loss   = NULL;
@@ -1904,11 +1914,14 @@ int main(int argc, char ** argv) {
     printf("%s: work_size = %zu bytes (%.1f MB)\n", __func__, max_work_size, (float) max_work_size / (1024.0f*1024.0f));
 
     // context for work buffer
-    struct ggml_init_params ctx_work_params = {
-      .mem_size= max_work_size, // mem_size
-      .mem_buffer = NULL,          // mem_buffer
-      .no_alloc  = false,         // no_alloc
-    };
+    struct ggml_init_params ctx_work_params(
+					    //.mem_size=
+					    max_work_size, // mem_size
+					    //.mem_buffer =
+					    NULL,          // mem_buffer
+					    //.no_alloc  =
+					    false         // no_alloc
+					    );
     struct ggml_context * ctx_work = ggml_init(ctx_work_params);
 
     int64_t t0 = ggml_time_ms();
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index bfd0d6f98..684724ef9 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -255,11 +255,14 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
 
     const auto & buf_compute = ctx->buf_compute;
 
-    struct ggml_init_params params = {
-      .mem_size = buf_compute.size,
-      .mem_buffer = buf_compute.data,
-      .no_alloc = false,
-    };
+    struct ggml_init_params params(
+				   //.mem_size =
+				   buf_compute.size,
+				   //.mem_buffer =
+				   buf_compute.data,
+				   //.no_alloc =
+				   false
+				   );
 
     params.no_alloc = true;
 
@@ -455,10 +458,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
 
     struct ggml_context * meta = NULL;
 
-    struct gguf_init_params params = {
-      .no_alloc =  true,
-      .ctx      =  &meta,
-    };
+    struct gguf_init_params params(
+				   //.no_alloc =
+				   true,
+				   //.ctx      =
+				   &meta);
+    
 
     struct gguf_context * ctx = gguf_init_from_file(fname, params);
     if (!ctx) {
@@ -552,11 +557,14 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
 
     // load tensors
     {
-        struct ggml_init_params params = {
-	  .mem_size = ctx_size,
-	  .mem_buffer = NULL,
-          .no_alloc = false,
-        };
+      struct ggml_init_params params(
+				     //.mem_size =
+				     ctx_size,
+				     //.mem_buffer =
+				     NULL,
+				     //.no_alloc =
+				     false
+				     );
 
         new_clip->ctx = ggml_init(params);
         if (!new_clip->ctx) {
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index f7f3792f2..04e7546f7 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -600,10 +600,12 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
 
     // set vocab by copying from vocab_model gguf file
     {
-        struct gguf_init_params params = {
-	  .no_alloc =  false,
-	  .ctx      = NULL,
-        };
+      struct gguf_init_params params(
+	  //.no_alloc =
+	  false,
+	  //.ctx      =
+	  NULL
+				     );
         struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
 
         const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST));
@@ -745,9 +747,11 @@ static void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_voc
 
 static bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct train_state * train) {
     struct ggml_context * f_ggml_ctx;
-    struct gguf_init_params params;
-    params.no_alloc = false;
-    params.ctx = &f_ggml_ctx;
+    struct gguf_init_params params(
+				   //params.no_alloc =
+				   false,
+				   //params.ctx =
+				   &f_ggml_ctx);
     struct gguf_context * fctx = gguf_init_from_file(filename, params);
     if (fctx == NULL) {
         return false;
@@ -1085,11 +1089,14 @@ int main(int argc, char ** argv) {
     ggml_allocr * alloc = NULL;
 
     // context for input tensors without their data
-    struct ggml_init_params ctx_input_params = {
-        .mem_size = ggml_tensor_overhead() * 2, // mem_size
-        .mem_buffer = NULL,                       // mem_buffer
-        .no_alloc = true,                       // no_alloc
-    };
+    struct ggml_init_params ctx_input_params (
+					      //.mem_size =
+					      ggml_tensor_overhead() * 2, // mem_size
+					      //       .mem_buffer =
+					      NULL,                       // mem_buffer
+					      //       .no_alloc =
+					      true                       // no_alloc
+					      );
     struct ggml_context * ctx_input = ggml_init(ctx_input_params);
 
     // the input tensors
@@ -1114,11 +1121,14 @@ int main(int argc, char ** argv) {
             2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() +
             (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
     );
-    struct ggml_init_params ctx_compute_params = {
-      .mem_size = estimated_compute_size_wo_data, // mem_size
-      .mem_buffer= NULL,                           // mem_buffer
-      .no_alloc = true,                           // no_alloc
-    };
+    struct ggml_init_params ctx_compute_params(
+					       //    .mem_size =
+					       estimated_compute_size_wo_data, // mem_size
+					       //.mem_buffer=
+					       NULL,                           // mem_buffer
+					       //.no_alloc =
+					       true                           // no_alloc
+					       );
     struct ggml_context * ctx_compute = NULL;
 
     struct ggml_tensor * loss   = NULL;
@@ -1267,11 +1277,14 @@ int main(int argc, char ** argv) {
     printf("%s: work_size = %zu bytes (%.1f MB)\n", __func__, max_work_size, (float) max_work_size / (1024.0f*1024.0f));
 
     // context for work buffer
-    struct ggml_init_params ctx_work_params = {
-      .mem_size= max_work_size, // 
-      .mem_buffer= NULL,          // 
-      .no_alloc=false,         // 
-    };
+    struct ggml_init_params ctx_work_params(
+					    //.mem_size=
+					    max_work_size, // 
+					    //.mem_buffer=
+					    NULL,          // 
+					    //.no_alloc=
+					    false         // 
+					    );
     struct ggml_context * ctx_work = ggml_init(ctx_work_params);
 
     int64_t t0 = ggml_time_ms();
diff --git a/ggml-backend.cpp b/ggml-backend.cpp
index 2a43b1277..f258f69e3 100644
--- a/ggml-backend.cpp
+++ b/ggml-backend.cpp
@@ -586,11 +586,14 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
     memset(sched->node_copies,   0, sizeof(sched->node_copies[0])   * hash_size);
     sched->n_splits = 0;
 
-    struct ggml_init_params params = {
-        .mem_size =   sizeof(sched->context_buffer),
-        .mem_buffer =  sched->context_buffer,
-        .no_alloc =    true
-    };
+    struct ggml_init_params params(
+				   //.mem_size =
+				   sizeof(sched->context_buffer),
+				   //.mem_buffer =
+				   sched->context_buffer,
+				   //.no_alloc =
+				   true
+				   );
 
     if (sched->ctx != NULL) {
         ggml_free(sched->ctx);
diff --git a/ggml.cpp b/ggml.cpp
index c0ea3704e..27f078ca1 100644
--- a/ggml.cpp
+++ b/ggml.cpp
@@ -16446,7 +16446,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
         /*.abort_callback          =*/ NULL,
         /*.abort_callback_data     =*/ NULL,
     };
-    struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads);
+    struct ggml_compute_state * workers = (struct ggml_compute_state *)alloca(sizeof(struct ggml_compute_state)*n_threads);
 
     // create thread pool
     if (n_threads > 1) {
@@ -16775,11 +16775,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
             const size_t overhead = 1*ggml_tensor_overhead();
 	    GGML_ASSERT(0);
 	    // FIXME
-            struct ggml_init_params params;// = {
-            params.mem_size   = fsize + overhead,
-            params.mem_buffer = NULL,
-            params.no_alloc   = false,
-            // };
+            struct ggml_init_params params(
+					   fsize + overhead,
+					   NULL,
+					   false);
+
 
             *ctx_data = ggml_init(params);
 
@@ -16831,10 +16831,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
         {
             const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false);
 
-            struct ggml_init_params params;// = {
-            params.mem_size   = size_eval + overhead,
-            params.mem_buffer = NULL,
-            params.no_alloc   = true,
+            struct ggml_init_params params(
+					   size_eval + overhead,
+NULL,
+					   true);
 
             *ctx_eval = ggml_init(params);
 
@@ -17974,7 +17974,7 @@ GGML_API void ggml_opt_init(
     opt->nx = nx;
     opt->just_initialized = true;
     if (opt->ctx == NULL) {
-        struct ggml_init_params ctx_opt_params;
+      struct ggml_init_params ctx_opt_params;
         if (opt->params.type == GGML_OPT_ADAM) {
             ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
             if (opt->params.past > 0) {
@@ -18690,10 +18690,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
             (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
 
 	// FIXME
-        struct ggml_init_params pdata;
-        pdata.mem_size   = mem_size,
-        pdata.mem_buffer = NULL,
-        pdata.no_alloc   = params.no_alloc,
+        struct ggml_init_params pdata(
+				      mem_size,
+				      NULL,
+				      params.no_alloc);
 
         *params.ctx = ggml_init(pdata);
 
diff --git a/ggml.h b/ggml.h
index 9f5f20da9..66a6b65bc 100644
--- a/ggml.h
+++ b/ggml.h
@@ -286,7 +286,7 @@
     GGML_UNUSED(prefix##3);
 
 #ifdef  __cplusplus
-extern "C" {
+//extern "C" {
 #endif
 
 #if defined(__ARM_NEON) && defined(__CUDACC__)
@@ -581,6 +581,18 @@ extern "C" {
     };
 
     struct ggml_init_params : refl::attr::usage::type{
+
+      ggml_init_params(size_t mem_size,
+		       void * mem_buffer,
+		       bool   no_alloc):
+	mem_size( mem_size),
+        mem_buffer(mem_buffer),
+        no_alloc(no_alloc){}
+      ggml_init_params():
+	mem_size(0),
+        mem_buffer(0),
+        no_alloc(0){}
+      
         // memory pool
         size_t mem_size;   // bytes
         void * mem_buffer; // if NULL, memory will be allocated internally
@@ -2013,6 +2025,8 @@ extern "C" {
     struct gguf_context;
 
     struct gguf_init_params : refl::attr::usage::type{
+      gguf_init_params(bool no_alloc, struct ggml_context ** ctx): no_alloc(no_alloc),ctx(ctx){}
+      
         bool no_alloc;
 
         // if not NULL, create a ggml_context and allocate the tensor data in it
@@ -2164,5 +2178,5 @@ extern "C" {
     GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
 
 #ifdef  __cplusplus
-}
+//}
 #endif
diff --git a/llama.cpp b/llama.cpp
index 9bf27d69e..2d0d2b30f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1785,10 +1785,10 @@ struct llama_model_loader {
     struct ggml_context * ctx_meta = NULL;
 
     llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
-	struct gguf_init_params params = {
-	  .no_alloc =  true,
-	  .ctx      =  &ctx_meta,
-	};
+      struct gguf_init_params params(
+				     /*.no_alloc =*/  true,
+				     /*.ctx      = */ &ctx_meta
+				     );
 
 	ctx_gguf = gguf_init_from_file(fname.c_str(), params);
 	if (!ctx_gguf) {
@@ -2676,11 +2676,12 @@ static void llm_load_tensors(
 	    model.mlock_buf.grow_to(model.buf.size);
 	}
 
-	struct ggml_init_params params = {
-	    .mem_size   = model.buf.size,
-	    .mem_buffer = model.buf.data,
-	    .no_alloc   = ml.use_mmap,
-	};
+	struct ggml_init_params params(
+				       model.buf.size,
+				       model.buf.data,
+				       
+
+				       ml.use_mmap				       );
 
 	model.ctx = ggml_init(params);
 	if (!model.ctx) {
@@ -3842,11 +3843,14 @@ struct llm_build_context {
         }
 
     void init() {
-        struct ggml_init_params params = {
-	  .mem_size   = buf_compute.size,
-          .mem_buffer = buf_compute.data,
-          .no_alloc   = true,
-        };
+      struct ggml_init_params params(
+				     //.mem_size   =
+				     buf_compute.size,
+				     //.mem_buffer =
+				     buf_compute.data,
+				     //.no_alloc   =
+				     true
+				     );
 
         ctx0 = ggml_init(params);
     }
@@ -8447,10 +8451,11 @@ void llama_backend_init(bool numa) {
 
     // needed to initialize f16 tables
     {
-        struct ggml_init_params params = { .mem_size = 0,
-					   .mem_buffer = NULL,
-					   .no_alloc = false
-	};
+      struct ggml_init_params params(
+				     0,
+				     NULL,
+				     false
+				     );
         struct ggml_context * ctx = ggml_init(params);
         ggml_free(ctx);
     }
@@ -9021,11 +9026,14 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
         if (kv_buf_size) {
             const size_t elt_size = ggml_element_size(kv_self.k);
 
-	    ggml_init_params ip = {
-	      .mem_size   = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
-	      .mem_buffer =NULL,
-	      .no_alloc = /* no_alloc */ true
-	    };
+	    ggml_init_params ip(
+				//.mem_size   =
+				6*ggml_tensor_overhead() + ggml_graph_overhead(),
+				//.mem_buffer =
+				NULL,
+				//.no_alloc = /* no_alloc */
+				true
+				);
 	    
             ggml_context * cpy_ctx = ggml_init( ip);
             ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
@@ -9155,10 +9163,13 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
 
             const size_t elt_size = ggml_element_size(kv_self.k);
 
-	    ggml_init_params ip {
-	      .mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
-	      .mem_buffer=NULL,
-	      .no_alloc=true };
+	    ggml_init_params ip(
+				//.mem_size=
+				6*ggml_tensor_overhead() + ggml_graph_overhead(),
+				//.mem_buffer=
+				NULL,
+				//.no_alloc=
+				true );
 	    
             ggml_context * cpy_ctx = ggml_init(ip);
             ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
diff --git a/llama.h b/llama.h
index b016b48f8..cbde7990b 100644
--- a/llama.h
+++ b/llama.h
@@ -50,7 +50,7 @@
 #endif
 
 #ifdef __cplusplus
-extern "C" {
+//extern "C" {
 #endif
 
     //
@@ -189,7 +189,7 @@ extern "C" {
         llama_seq_id all_seq_id; // used if seq_id == NULL
     } llama_batch;
 
-    struct llama_model_params : refl::attr::usage::type{
+    struct llama_model_params {
         int32_t n_gpu_layers; // number of layers to store in VRAM
         int32_t main_gpu;     // the GPU that is used for scratch and small tensors
         const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
@@ -268,7 +268,7 @@ extern "C" {
         LLAMA_GRETYPE_CHAR_ALT       = 6,
     };
 
-    typedef struct llama_grammar_element : refl::attr::usage::type{
+    typedef struct llama_grammar_element : refl::attr::usage::type {
       llama_grammar_element(        enum llama_gretype type,
 				    uint32_t           value // Unicode code point or rule ID
 				    ):type(type), value(value){}
@@ -811,7 +811,7 @@ extern "C" {
     LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);
 
 #ifdef __cplusplus
-}
+//}
 #endif
 
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
@@ -828,6 +828,6 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
 
 #endif // LLAMA_API_INTERNAL
 
-template<typename T> void print_fields(const T& obj);
+
 
 #endif // LLAMA_H
diff --git a/print.hpp b/print.hpp
index d0679eca1..9c17550c0 100644
--- a/print.hpp
+++ b/print.hpp
@@ -4,65 +4,373 @@
 //#include <refl.hpp>
 #include "llama.h"
 
-REFL_TYPE(ggml_object)
-REFL_END
-REFL_TYPE(ggml_tensor)
-REFL_END
-REFL_TYPE(ggml_cplan )
-REFL_END
-REFL_TYPE(ggml_hash_set )
-REFL_END
-REFL_TYPE(ggml_cgraph )
-REFL_END
-REFL_TYPE(ggml_scratch )
-REFL_END
 REFL_TYPE(ggml_init_params )
 REFL_END
-REFL_TYPE(ggml_compute_params )
-REFL_END
-REFL_TYPE(ggml_opt_params )
-REFL_END
+
 REFL_TYPE(ggml_opt_params::ggml_adam)
 REFL_END
+
 REFL_TYPE(ggml_opt_params::ggml_lbfgs)
 REFL_END
-REFL_TYPE(ggml_opt_context )
-REFL_END
+
+
 REFL_TYPE(ggml_opt_context::ggml_grad )
 REFL_END
+
 REFL_TYPE(gpt_params )
 REFL_END
-REFL_TYPE(gguf_init_params )
-REFL_END
-REFL_TYPE(ggml_something )
-REFL_END
+
+
 REFL_TYPE(llama_sampling_context )
 REFL_END
+
 REFL_TYPE(llama_token_data )
 REFL_END
-REFL_TYPE(llama_model )
-REFL_END
+
+
 REFL_TYPE(llama_token_data_array )
 REFL_END
+
 REFL_TYPE(llama_batch )
 REFL_END
-REFL_TYPE(llama_model_params )
-REFL_END
-REFL_TYPE(llama_context_params )
-REFL_END
-REFL_TYPE(llama_context )
-REFL_END
-REFL_TYPE(llama_model_quantize_params )
-REFL_END
-REFL_TYPE(llama_grammar_element )
-REFL_END
-REFL_TYPE(llama_timings )
-REFL_END
-REFL_TYPE(llama_beam_view )
-REFL_END
-REFL_TYPE(llama_beams_state )
+
+
+REFL_TYPE(ggml_object)
+  REFL_FIELD(offs)
 REFL_END
 
+REFL_TYPE(ggml_tensor)
+  REFL_FIELD(type)
+REFL_END
+
+REFL_TYPE(ggml_cplan)
+  REFL_FIELD(work_size)
+REFL_END
+
+REFL_TYPE(ggml_hash_set)
+  REFL_FIELD(size)
+REFL_END
+
+REFL_TYPE(ggml_cgraph)
+  REFL_FIELD(size)
+REFL_END
+
+REFL_TYPE(ggml_scratch)
+  REFL_FIELD(offs)
+REFL_END
+
+REFL_TYPE(ggml_compute_params)
+  REFL_FIELD(type)
+REFL_END
+
+REFL_TYPE(ggml_opt_params)
+  REFL_FIELD(type)
+REFL_END
+
+REFL_TYPE(ggml_opt_context)
+  REFL_FIELD(ctx)
+REFL_END
+
+//REFL_TYPE(gguf_context)
+//REFL_END
+
+REFL_TYPE(gguf_init_params)
+REFL_END
+
+REFL_TYPE(ggml_something)
+  REFL_FIELD(type_name)
+REFL_END
+
+
+//REFL_TYPE()
+//  REFL_FIELD(d)
+//REFL_TYPE()
+
+// incomplete ttype
+// REFL_TYPE(ggml_context)
+//   REFL_FIELD(mem_size)
+//   REFL_FIELD(mem_buffer)
+// REFL_END
+
+//REFL_TYPE(ggml_context_container)
+//  REFL_FIELD(used)
+//  REFL_FIELD(context)
+//REFL_END
+
+// REFL_TYPE(ggml_numa_node)
+//   REFL_FIELD(cpus)
+//   REFL_FIELD(n_cpus)
+// REFL_END
+
+// REFL_TYPE(ggml_numa_nodes)
+//   REFL_FIELD(nodes)
+//   REFL_FIELD(n_nodes)
+// REFL_END
+
+// REFL_TYPE(ggml_state)
+//   REFL_FIELD(contexts)
+//   REFL_FIELD(numa)
+//   REFL_END
+
+// REFL_TYPE(gguf_str)
+//   REFL_FIELD(n)
+//   REFL_FIELD(data)
+// REFL_END
+
+// REFL_TYPE(ggml_map_custom1_op_params)
+//   REFL_FIELD(fun)
+//   REFL_FIELD(n_tasks)
+// REFL_END
+
+// REFL_TYPE(ggml_map_custom2_op_params)
+//   REFL_FIELD(fun)
+//   REFL_FIELD(n_tasks)
+// REFL_END
+
+// REFL_TYPE(ggml_map_custom3_op_params)
+//   REFL_FIELD(fun)
+//   REFL_FIELD(n_tasks)
+// REFL_END
+
+// REFL_TYPE(hash_map)
+//   REFL_FIELD(set)
+//   REFL_FIELD(vals)
+// REFL_END
+// REFL_TYPE(ggml_compute_state_shared)
+//   REFL_FIELD(cgraph)
+//   REFL_FIELD(cplan)
+// REFL_END
+// REFL_TYPE(ggml_compute_state)
+//   REFL_FIELD(thrd)
+//   REFL_FIELD(ith)
+// REFL_END
+// REFL_TYPE(ggml_lbfgs_iteration_data)
+//   REFL_FIELD(alpha)
+//   REFL_FIELD(ys)
+// REFL_END
+//REFL_TYPE()
+//  REFL_FIELD(type)
+//REFL_END
+// REFL_TYPE(gguf_kv)
+//   REFL_FIELD(key)
+//   REFL_FIELD(type)
+// REFL_END
+
+// REFL_TYPE(gguf_header)
+//   REFL_FIELD(magic)
+//   REFL_FIELD(version)
+// REFL_END
+
+// REFL_TYPE(gguf_tensor_info)
+//   REFL_FIELD(name)
+//   REFL_FIELD(n_dims)
+// REFL_END
+
+REFL_TYPE(gguf_context)
+//  REFL_FIELD(header)
+//  REFL_FIELD(kv)
+REFL_END
+
+// REFL_TYPE(gguf_buf)
+//   REFL_FIELD(data)
+//   REFL_FIELD(size)
+// REFL_END
+
+//REFL_TYPE(llama_token_data)
+//REFL_END
+
+
+REFL_TYPE(llama_model_params)
+  REFL_FIELD(n_gpu_layers)
+REFL_END
+REFL_TYPE(llama_context_params)
+  REFL_FIELD(seed)
+REFL_END
+REFL_TYPE(llama_model_quantize_params)
+  REFL_FIELD(nthread)
+REFL_END
+
+REFL_TYPE(llama_grammar_element)
+REFL_END
+
+REFL_TYPE(llama_timings)
+  REFL_FIELD(t_start_ms)
+REFL_END
+REFL_TYPE(llama_beam_view)
+  REFL_FIELD(tokens)
+REFL_END
+
+REFL_TYPE(llama_beams_state)
+  REFL_FIELD(beam_views)
+REFL_END
+  
+//REFL_TYPE(ggml_backend)
+//REFL_END
+
+REFL_TYPE(ggml_backend_buffer)
+REFL_END
+
+//REFL_TYPE(ggml_allocr)
+//REFL_END
+
+//REFL_TYPE(ggml_tallocr)
+//REFL_END
+
+//REFL_TYPE(ggml_gallocr)
+//REFL_END
+
+
+//REFL_TYPE(llama_buffer)
+//REFL_FIELD(data)
+//REFL_FIELD(size)
+//REFL_END
+  
+
+// REFL_TYPE(llama_file)
+// REFL_FIELD(fp)
+// REFL_FIELD(size)
+// REFL_END
+  
+
+// REFL_TYPE(llama_mmap)
+// REFL_FIELD(addr)
+// REFL_FIELD(size)
+// REFL_END
+
+
+// REFL_TYPE(llama_mlock)
+//   REFL_FIELD(addr)
+//   REFL_FIELD(size)
+// REFL_END
+
+//REFL_TYPE(llama_state)
+//  REFL_FIELD(log_callback)
+//  REFL_FIELD(log_callback_user_data)
+//  REFL_END
+  
+
+// REFL_TYPE(llama_hparams)
+//   REFL_FIELD(vocab_only)
+//   REFL_FIELD(n_vocab)
+//   REFL_END
+
+
+//REFL_TYPE(llama_cparams)
+//  REFL_FIELD(n_ctx)
+//  REFL_FIELD(n_batch)
+//REFL_END
+
+//REFL_TYPE(llama_layer)
+//  REFL_FIELD(attn_norm)
+//  REFL_FIELD(attn_norm_b)
+//REFL_END
+
+// REFL_TYPE(llama_kv_cell)
+//   REFL_FIELD(pos)
+//   REFL_FIELD(delta)
+// REFL_END
+
+// REFL_TYPE(llama_kv_cache)
+//   REFL_FIELD(has_shift)
+//   REFL_FIELD(head)
+// REFL_END
+
+// REFL_TYPE(llama_vocab)
+// REFL_END
+
+REFL_TYPE(llama_model)
+//  REFL_FIELD(type)
+//  REFL_FIELD(arch)
+REFL_END
+
+REFL_TYPE(llama_context)
+REFL_END
+
+// REFL_TYPE(llama_model_loader)
+//   REFL_FIELD(n_kv)
+//   REFL_FIELD(n_tensors)
+// REFL_END
+
+// REFL_TYPE(llm_build_context)
+//   REFL_FIELD(model)
+//   REFL_FIELD(hparams)
+// REFL_END
+
+// REFL_TYPE(llm_offload_trie)
+// REFL_END
+
+// REFL_TYPE(llm_symbol)
+//   REFL_FIELD(prev)
+// REFL_END
+
+// REFL_TYPE(llm_bigram_spm)
+// REFL_END
+
+// REFL_TYPE(llm_tokenizer_spm)
+// REFL_END
+
+// REFL_TYPE(llm_bigram_bpe)
+// REFL_END
+
+// REFL_TYPE(llm_tokenizer_bpe)
+// REFL_END
+  
+
+// REFL_TYPE(fragment_buffer_variant)
+// REFL_END
+  
+
+// REFL_TYPE(llama_partial_utf8)
+//   REFL_FIELD(value)
+//   REFL_FIELD(n_remain)
+// REFL_END
+  
+
+REFL_TYPE(llama_grammar)
+//  REFL_FIELD(rules)
+//  REFL_FIELD(stacks)
+REFL_END
+  
+
+//REFL_TYPE(llama_grammar_candidate)
+//  REFL_FIELD(index)
+//  REFL_FIELD(code_points)
+//REFL_END
+  
+
+// REFL_TYPE(llama_beam)
+//   REFL_FIELD(tokens)
+//   REFL_FIELD(p)
+// REFL_END
+  
+
+// REFL_TYPE(llama_logit_info)
+//   REFL_FIELD(logits)
+//   REFL_FIELD(n_vocab)
+// REFL_END
+
+// REFL_TYPE(llama_beam_search_data)
+//   REFL_FIELD(ctx)
+//   REFL_FIELD(n_beams)
+// REFL_END
+
+
+// REFL_TYPE(quantize_state_internal)
+//   REFL_FIELD(model)
+//   REFL_FIELD(params)
+// REFL_END
+
+// REFL_TYPE(llama_data_context)
+// REFL_END
+  
+// REFL_TYPE(llama_data_buffer_context)
+//   REFL_FIELD(ptr)
+// REFL_END
+
+// REFL_TYPE(llama_data_file_context)
+//   REFL_FIELD(file)
+// REFL_END
+  
 // // A simple struct with some fields and a function
 // // A custom attribute to mark some fields as hidden
 struct hidden : refl::attr::usage::field {};
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index a2459a286..a58e55562 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -115,11 +115,11 @@ int main(int argc, char * argv[]) {
     generate_data(1.0, test_data2.size(), test_data2.data());
 
     // Initialize GGML, ensures float conversion tables are initialized
-    struct ggml_init_params ggml_params = {
+    struct ggml_init_params ggml_params(
         /* .mem_size   = */ 1*1024,
         /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ true,
-    };
+        /* .no_alloc   = */ true
+					);
     struct ggml_context * ctx = ggml_init(ggml_params);
 
     int num_failed = 0;
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
index 88fac0e23..dccfe087b 100644
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -261,11 +261,11 @@ int main(int argc, char * argv[]) {
 
 
     // Initialize GGML, ensures float conversion tables are initialized
-    struct ggml_init_params ggml_params = {
+    struct ggml_init_params ggml_params(
         /* .mem_size   = */ 1*1024,
         /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ true,
-    };
+        /* .no_alloc   = */ true
+					);
     struct ggml_context * ctx = ggml_init(ggml_params);
 
     for (int i = 0; i < GGML_TYPE_COUNT; i++) {
diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp
index 26c1f42dc..e1d92cdd4 100644
--- a/tests/test-rope.cpp
+++ b/tests/test-rope.cpp
@@ -124,11 +124,11 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
 }
 
 int main(int /*argc*/, const char ** /*argv*/) {
-    struct ggml_init_params params = {
+  struct ggml_init_params params(
         /* .mem_size   = */ 128*1024*1024,
         /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ false,
-    };
+        /* .no_alloc   = */ false
+				 );
 
     std::vector<uint8_t> work_buffer;