From 6fd690fae7f38024492caebb0233b294482e39a7 Mon Sep 17 00:00:00 2001
From: mike dupont <mike.dupont@introspector.local>
Date: Wed, 22 Nov 2023 09:04:00 -0500
Subject: [PATCH] running

---
 examples/baby-llama/baby-llama.cpp            | 12 +++----
 examples/benchmark/benchmark-matmult.cpp      |  6 ++--
 .../convert-llama2c-to-ggml.cpp               |  4 +--
 examples/finetune/finetune.cpp                | 22 ++++++------
 examples/gguf/gguf.cpp                        | 14 ++++----
 examples/llava/clip.cpp                       | 16 ++++-----
 .../train-text-from-scratch.cpp               | 22 ++++++------
 ggml-alloc.cpp                                | 20 ++++++-----
 ggml-backend.cpp                              |  6 ++--
 ggml.cpp                                      | 22 ++++++------
 ggml.h                                        | 33 ++++++++---------
 llama.cpp                                     | 36 +++++++++++++------
 12 files changed, 116 insertions(+), 97 deletions(-)

diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 8155101d0..697e003e3 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -1528,9 +1528,9 @@ int main(int argc, char ** argv) {
 
     for (int ex=0; ex<n_examples; ++ex) {
         struct ggml_init_params params = {
-            /*.mem_size   =*/ compute_size,
-            /*.mem_buffer =*/ compute_addr,
-            /*.no_alloc   =*/ false,
+            .mem_size   = compute_size,
+            .mem_buffer = compute_addr,
+            .no_alloc   = false,
         };
 
         struct ggml_context * ctx0 = ggml_init(params);
@@ -1603,9 +1603,9 @@ int main(int argc, char ** argv) {
         printf("---\n");
         for (int i=0; i<n_gen; ++i) {
             struct ggml_init_params params = {
-                /*.mem_size   =*/ compute_size,
-                /*.mem_buffer =*/ compute_addr,
-                /*.no_alloc   =*/ false,
+                .mem_size   = compute_size,
+                .mem_buffer = compute_addr,
+                .no_alloc   = false,
             };
             struct ggml_context * ctx0 = ggml_init(params);
 
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 284733b10..f3acf14dd 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -141,9 +141,9 @@ int main(int argc, char ** argv)  {
     printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
 
     struct ggml_init_params params = {
-        /*.mem_size   =*/ ctx_size,
-        /*.mem_buffer =*/ NULL,
-        /* no_alloc   =*/ 0
+        .mem_size   = ctx_size,
+        .mem_buffer = NULL,
+        .no_alloc   = 0
     };
 
     ctx = ggml_init(params);
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index cae3bf3c3..9719eabde 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -554,8 +554,8 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
         struct ggml_context * ctx_data = NULL;
 
         struct gguf_init_params params = {
-            /*.no_alloc = */ false,
-            /*.ctx      = */ &ctx_data,
+	  .no_alloc =  false,
+	  .ctx      =  &ctx_data,
         };
 
         struct gguf_context * ctx = gguf_init_from_file(filename, params);
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 8eb3414ad..cda68b76d 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -295,8 +295,8 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
     // get parameters directly from gguf file
     {
         struct gguf_init_params params = {
-            /*.no_alloc = */ false,
-            /*.ctx      = */ NULL,
+	  .no_alloc = false,
+	  .ctx      = NULL,
         };
         struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
 
@@ -1709,9 +1709,9 @@ int main(int argc, char ** argv) {
 
     // context for input tensors without their data
     struct ggml_init_params ctx_input_params = {
-        ggml_tensor_overhead() * 2, // mem_size
-        NULL,                       // mem_buffer
-        true,                       // no_alloc
+      .mem_size= ggml_tensor_overhead() * 2, // mem_size
+      .mem_buffer=NULL,                       // mem_buffer
+      .no_alloc=true,                       // no_alloc
     };
     struct ggml_context * ctx_input = ggml_init(ctx_input_params);
 
@@ -1738,9 +1738,9 @@ int main(int argc, char ** argv) {
             (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
     );
     struct ggml_init_params ctx_compute_params = {
-        estimated_compute_size_wo_data, // mem_size
-        NULL,                           // mem_buffer
-        true,                           // no_alloc
+      .mem_size=estimated_compute_size_wo_data, // mem_size
+      .mem_buffer=NULL,                           // mem_buffer
+      .no_alloc=true,                           // no_alloc
     };
     struct ggml_context * ctx_compute = NULL;
 
@@ -1905,9 +1905,9 @@ int main(int argc, char ** argv) {
 
     // context for work buffer
     struct ggml_init_params ctx_work_params = {
-        max_work_size, // mem_size
-        NULL,          // mem_buffer
-        false,         // no_alloc
+      .mem_size= max_work_size, // mem_size
+      .mem_buffer = NULL,          // mem_buffer
+      .no_alloc  = false,         // no_alloc
     };
     struct ggml_context * ctx_work = ggml_init(ctx_work_params);
 
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
index 9ab63a293..205ad438d 100644
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -41,9 +41,9 @@ static bool gguf_ex_write(const std::string & fname) {
     gguf_set_arr_str (ctx, "some.parameter.arr.str",                    std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
 
     struct ggml_init_params params = {
-        /*.mem_size   =*/ 128ull*1024ull*1024ull,
-        /*.mem_buffer =*/ NULL,
-        /*.no_alloc   =*/ false,
+        .mem_size   = 128ull*1024ull*1024ull,
+        .mem_buffer = NULL,
+        .no_alloc   = false,
     };
 
     struct ggml_context * ctx_data = ggml_init(params);
@@ -87,8 +87,8 @@ static bool gguf_ex_write(const std::string & fname) {
 // just read tensor info
 static bool gguf_ex_read_0(const std::string & fname) {
     struct gguf_init_params params = {
-        /*.no_alloc = */ false,
-        /*.ctx      = */ NULL,
+      .no_alloc = false,
+      .ctx      = NULL,
     };
 
     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@@ -147,8 +147,8 @@ static bool gguf_ex_read_1(const std::string & fname) {
     struct ggml_context * ctx_data = NULL;
 
     struct gguf_init_params params = {
-        /*.no_alloc = */ false,
-        /*.ctx      = */ &ctx_data,
+      .no_alloc =  false,
+      .ctx      =  &ctx_data,
     };
 
     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index fc0656c23..bfd0d6f98 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -256,9 +256,9 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
     const auto & buf_compute = ctx->buf_compute;
 
     struct ggml_init_params params = {
-        /*.mem_size =*/ buf_compute.size,
-        /*.mem_buffer =*/ buf_compute.data,
-        /*.no_alloc =*/ false,
+      .mem_size = buf_compute.size,
+      .mem_buffer = buf_compute.data,
+      .no_alloc = false,
     };
 
     params.no_alloc = true;
@@ -456,8 +456,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
     struct ggml_context * meta = NULL;
 
     struct gguf_init_params params = {
-        /*.no_alloc = */ true,
-        /*.ctx      = */ &meta,
+      .no_alloc =  true,
+      .ctx      =  &meta,
     };
 
     struct gguf_context * ctx = gguf_init_from_file(fname, params);
@@ -553,9 +553,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
     // load tensors
     {
         struct ggml_init_params params = {
-            /*.mem_size =*/ ctx_size,
-            /*.mem_buffer =*/ NULL,
-            /*.no_alloc =*/ false,
+	  .mem_size = ctx_size,
+	  .mem_buffer = NULL,
+          .no_alloc = false,
         };
 
         new_clip->ctx = ggml_init(params);
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index f605f25a1..f7f3792f2 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -601,8 +601,8 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
     // set vocab by copying from vocab_model gguf file
     {
         struct gguf_init_params params = {
-            /*.no_alloc = */ false,
-            /*.ctx      = */ NULL,
+	  .no_alloc =  false,
+	  .ctx      = NULL,
         };
         struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
 
@@ -1086,9 +1086,9 @@ int main(int argc, char ** argv) {
 
     // context for input tensors without their data
     struct ggml_init_params ctx_input_params = {
-        ggml_tensor_overhead() * 2, // mem_size
-        NULL,                       // mem_buffer
-        true,                       // no_alloc
+        .mem_size = ggml_tensor_overhead() * 2, // mem_size
+        .mem_buffer = NULL,                       // mem_buffer
+        .no_alloc = true,                       // no_alloc
     };
     struct ggml_context * ctx_input = ggml_init(ctx_input_params);
 
@@ -1115,9 +1115,9 @@ int main(int argc, char ** argv) {
             (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
     );
     struct ggml_init_params ctx_compute_params = {
-        estimated_compute_size_wo_data, // mem_size
-        NULL,                           // mem_buffer
-        true,                           // no_alloc
+      .mem_size = estimated_compute_size_wo_data, // mem_size
+      .mem_buffer= NULL,                           // mem_buffer
+      .no_alloc = true,                           // no_alloc
     };
     struct ggml_context * ctx_compute = NULL;
 
@@ -1268,9 +1268,9 @@ int main(int argc, char ** argv) {
 
     // context for work buffer
     struct ggml_init_params ctx_work_params = {
-        max_work_size, // mem_size
-        NULL,          // mem_buffer
-        false,         // no_alloc
+      .mem_size= max_work_size, // 
+      .mem_buffer= NULL,          // 
+      .no_alloc=false,         // 
     };
     struct ggml_context * ctx_work = ggml_init(ctx_work_params);
 
diff --git a/ggml-alloc.cpp b/ggml-alloc.cpp
index cdfe4caf6..4c0c914d7 100644
--- a/ggml-alloc.cpp
+++ b/ggml-alloc.cpp
@@ -351,15 +351,17 @@ struct ggml_gallocr {
 ggml_gallocr_t ggml_gallocr_new(void) {
     ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr));
 
+    ggml_hash_set hs = {.size=0, .keys=NULL};
     *galloc = (struct ggml_gallocr) {
-        /*.talloc           = */ NULL,
-        /*.hash_set         = */ {0},
-        /*.hash_values      = */ NULL,
-        /*.hash_values_size = */ 0,
-        /*.hash_allocs      = */ NULL,
-        /*.parse_seq        = */ NULL,
-        /*.parse_seq_len    = */ 0,
+      .talloc           =  NULL,
+      .hash_set  =hs,
+      .hash_values      =  NULL,
+      .hash_values_size =  0,
+      .hash_allocs      =  NULL,
+      .parse_seq        =  NULL,
+      .parse_seq_len    =  0,
     };
+    //((*galloc).hash_set)[0]         =  0;
 
     return galloc;
 }
@@ -706,8 +708,8 @@ struct ggml_allocr {
 static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) {
     ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr));
     *alloc = (struct ggml_allocr) {
-        /*.talloc = */ talloc,
-        /*.galloc = */ ggml_gallocr_new(),
+      .talloc =  talloc,
+      .galloc =  ggml_gallocr_new(),
     };
     return alloc;
 }
diff --git a/ggml-backend.cpp b/ggml-backend.cpp
index 51f7018d1..2a43b1277 100644
--- a/ggml-backend.cpp
+++ b/ggml-backend.cpp
@@ -587,9 +587,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
     sched->n_splits = 0;
 
     struct ggml_init_params params = {
-        /*.mem_size =   */ sizeof(sched->context_buffer),
-        /*.mem_buffer = */ sched->context_buffer,
-        /*.no_alloc =   */ true
+        .mem_size =   sizeof(sched->context_buffer),
+        .mem_buffer =  sched->context_buffer,
+        .no_alloc =    true
     };
 
     if (sched->ctx != NULL) {
diff --git a/ggml.cpp b/ggml.cpp
index 963b44e3f..c0ea3704e 100644
--- a/ggml.cpp
+++ b/ggml.cpp
@@ -2,6 +2,8 @@
 //https://github.com/Neargye/magic_enum.git
 #include <magic_enum.hpp>
 
+
+
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #define _USE_MATH_DEFINES // For M_PI on MSVC
 
@@ -16136,11 +16138,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
             // all other threads are finished and spinning
             // do finalize and init here so we don't have synchronize again
             struct ggml_compute_params params = {
-                /*.type  =*/ GGML_TASK_FINALIZE,
-                /*.ith   =*/ 0,
-                /*.nth   =*/ 0,
-                /*.wsize =*/ cplan->work_size,
-                /*.wdata =*/ cplan->work_data,
+	      .type  = GGML_TASK_FINALIZE,
+	      .ith   = 0,
+	      .nth   = 0,
+	      .wsize = cplan->work_size,
+	      .wdata = cplan->work_data,
             };
 
             if (node_n != -1) {
@@ -16219,11 +16221,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
         const int n_tasks = ggml_get_n_tasks(node, n_threads);
 
         struct ggml_compute_params params = {
-            /*.type  =*/ GGML_TASK_COMPUTE,
-            /*.ith   =*/ state->ith,
-            /*.nth   =*/ n_tasks,
-            /*.wsize =*/ cplan->work_size,
-            /*.wdata =*/ cplan->work_data,
+	  .type  = GGML_TASK_COMPUTE,
+	  .ith   = state->ith,
+	  .nth   = n_tasks,
+	  .wsize = cplan->work_size,
+	  .wdata = cplan->work_data,
         };
 
         if (state->ith < n_tasks) {
diff --git a/ggml.h b/ggml.h
index 6d625fd67..9f5f20da9 100644
--- a/ggml.h
+++ b/ggml.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include<refl-cpp/refl.hpp>
 //
 // GGML Tensor Library
 //
@@ -465,7 +466,7 @@ extern "C" {
     };
 
     // ggml object
-    struct ggml_object {
+    struct ggml_object : refl::attr::usage::type {
         size_t offs;
         size_t size;
 
@@ -479,7 +480,7 @@ extern "C" {
     static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
 
     // n-dimensional tensor
-    struct ggml_tensor {
+    struct ggml_tensor : refl::attr::usage::type{
         enum ggml_type         type;
         enum ggml_backend_type backend;
 
@@ -524,7 +525,7 @@ extern "C" {
 
     // the compute plan that needs to be prepared for ggml_graph_compute()
     // since https://github.com/ggerganov/ggml/issues/287
-    struct ggml_cplan {
+    struct ggml_cplan : refl::attr::usage::type{
         size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
         uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
 
@@ -541,13 +542,13 @@ extern "C" {
         GGML_CGRAPH_EVAL_ORDER_COUNT
     };
 
-    struct ggml_hash_set {
+    struct ggml_hash_set : refl::attr::usage::type{
         size_t size;
         struct ggml_tensor ** keys;
     };
 
     // computation graph
-    struct ggml_cgraph {
+    struct ggml_cgraph : refl::attr::usage::type{
         int size;
         int n_nodes;
         int n_leafs;
@@ -567,7 +568,7 @@ extern "C" {
     };
 
     // scratch buffer
-    struct ggml_scratch {
+    struct ggml_scratch : refl::attr::usage::type{
         size_t offs;
         size_t size;
         void * data;
@@ -579,7 +580,7 @@ extern "C" {
       {}
     };
 
-    struct ggml_init_params {
+    struct ggml_init_params : refl::attr::usage::type{
         // memory pool
         size_t mem_size;   // bytes
         void * mem_buffer; // if NULL, memory will be allocated internally
@@ -597,7 +598,7 @@ extern "C" {
         GGML_TASK_FINALIZE,
     };
 
-    struct ggml_compute_params {
+    struct ggml_compute_params : refl::attr::usage::type{
         enum ggml_task_type type;
 
         // ith = thread index, nth = number of threads
@@ -1835,7 +1836,7 @@ extern "C" {
     //
     //   see ggml.c (ggml_opt_default_params) for default values
     //
-    struct ggml_opt_params {
+    struct ggml_opt_params : refl::attr::usage::type{
         enum ggml_opt_type type;
 
         size_t graph_size;
@@ -1865,7 +1866,7 @@ extern "C" {
         int n_gradient_accumulation;
 
         // ADAM parameters
-        struct {
+        struct ggml_adam: refl::attr::usage::type{
             int n_iter;
 
             float sched; // schedule multiplier (fixed, decay or warmup)
@@ -1881,7 +1882,7 @@ extern "C" {
         } adam;
 
         // LBFGS parameters
-        struct {
+        struct ggml_lbfgs: refl::attr::usage::type{
             int m; // number of corrections to approximate the inv. Hessian
             int n_iter;
             int max_linesearch;
@@ -1896,7 +1897,7 @@ extern "C" {
         } lbfgs;
     };
 
-    struct ggml_opt_context {
+    struct ggml_opt_context : refl::attr::usage::type{
         struct ggml_context * ctx;
         struct ggml_opt_params params;
 
@@ -1908,7 +1909,7 @@ extern "C" {
         float loss_before;
         float loss_after;
 
-        struct {
+        struct ggml_grad : refl::attr::usage::type{
             struct ggml_tensor * g;  // current gradient
             struct ggml_tensor * m;  // first moment
             struct ggml_tensor * v;  // second moment
@@ -1918,7 +1919,7 @@ extern "C" {
             int n_no_improvement;
         } adam;
 
-        struct {
+        struct ggml_params : refl::attr::usage::type{
             struct ggml_tensor * x;    // current parameters
             struct ggml_tensor * xp;   // previous parameters
             struct ggml_tensor * g;    // current gradient
@@ -2011,7 +2012,7 @@ extern "C" {
 
     struct gguf_context;
 
-    struct gguf_init_params {
+    struct gguf_init_params : refl::attr::usage::type{
         bool no_alloc;
 
         // if not NULL, create a ggml_context and allocate the tensor data in it
@@ -2148,7 +2149,7 @@ extern "C" {
     typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void  * GGML_RESTRICT y, int k);
     typedef void (*ggml_vec_dot_t)   (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
 
-    typedef struct {
+    typedef struct ggml_something : refl::attr::usage::type{
         const char      * type_name;
         int               blck_size;
         size_t            type_size;
diff --git a/llama.cpp b/llama.cpp
index c51829c45..6aaaef6c1 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1786,8 +1786,8 @@ struct llama_model_loader {
 
     llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
         struct gguf_init_params params = {
-            /*.no_alloc = */ true,
-            /*.ctx      = */ &ctx_meta,
+	  .no_alloc =  true,
+	  .ctx      =  &ctx_meta,
         };
 
         ctx_gguf = gguf_init_from_file(fname.c_str(), params);
@@ -2677,9 +2677,9 @@ static void llm_load_tensors(
         }
 
         struct ggml_init_params params = {
-            /*.mem_size   =*/ model.buf.size,
-            /*.mem_buffer =*/ model.buf.data,
-            /*.no_alloc   =*/ ml.use_mmap,
+            .mem_size   = model.buf.size,
+            .mem_buffer = model.buf.data,
+            .no_alloc   = ml.use_mmap,
         };
 
         model.ctx = ggml_init(params);
@@ -3843,9 +3843,9 @@ struct llm_build_context {
 
     void init() {
         struct ggml_init_params params = {
-            /*.mem_size   =*/ buf_compute.size,
-            /*.mem_buffer =*/ buf_compute.data,
-            /*.no_alloc   =*/ true,
+	  .mem_size   = buf_compute.size,
+          .mem_buffer = buf_compute.data,
+          .no_alloc   = true,
         };
 
         ctx0 = ggml_init(params);
@@ -8427,7 +8427,10 @@ void llama_backend_init(bool numa) {
 
     // needed to initialize f16 tables
     {
-        struct ggml_init_params params = { 0, NULL, false };
+        struct ggml_init_params params = { .mem_size = 0,
+					   .mem_buffer = NULL,
+					   .no_alloc = false
+	};
         struct ggml_context * ctx = ggml_init(params);
         ggml_free(ctx);
     }
@@ -8998,7 +9001,13 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
         if (kv_buf_size) {
             const size_t elt_size = ggml_element_size(kv_self.k);
 
-            ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
+	    ggml_init_params ip = {
+	      .mem_size   = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
+	      .mem_buffer =NULL,
+	      .no_alloc = /* no_alloc */ true
+	    };
+	    
+            ggml_context * cpy_ctx = ggml_init( ip);
             ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
 
             ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
@@ -9126,7 +9135,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
 
             const size_t elt_size = ggml_element_size(kv_self.k);
 
-            ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
+	    ggml_init_params ip {
+	      .mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
+	      .mem_buffer=NULL,
+	      .no_alloc=true };
+	    
+            ggml_context * cpy_ctx = ggml_init(ip);
             ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
 
             ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);