From af50604c6e12f4dc916dc827002c2df431144af2 Mon Sep 17 00:00:00 2001
From: Herman Semenov <GermanAizek@yandex.ru>
Date: Thu, 15 Feb 2024 18:05:16 +0300
Subject: [PATCH] ggml : strict definition constantify for C compiler

---
 ggml.c | 456 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 228 insertions(+), 228 deletions(-)

diff --git a/ggml.c b/ggml.c
index d921d82fe..9a92d7ce3 100644
--- a/ggml.c
+++ b/ggml.c
@@ -2511,7 +2511,7 @@ size_t ggml_get_mem_size(const struct ggml_context * ctx) {
 size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
     size_t max_size = 0;
 
-    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
+    for (const struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
         size_t bytes = ggml_nbytes(tensor);
         max_size = MAX(max_size, bytes);
     }
@@ -2591,7 +2591,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
         enum   ggml_type      type,
         int                   n_dims,
         const int64_t       * ne,
-        struct ggml_tensor  * view_src,
+        const struct ggml_tensor  * view_src,
         size_t                view_offs) {
 
     assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
@@ -2754,7 +2754,7 @@ struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggm
     return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
 }
 
-static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
+static void ggml_set_op_params(const struct ggml_tensor * tensor, const void * params, size_t params_size) {
     GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
     assert(params_size <= GGML_MAX_OP_PARAMS);
     memcpy(tensor->op_params, params, params_size);
@@ -2765,7 +2765,7 @@ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_
     return ((const int32_t *)(tensor->op_params))[i];
 }
 
-static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
+static void ggml_set_op_params_i32(const struct ggml_tensor * tensor, uint32_t i, int32_t value) {
     assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
     ((int32_t *)(tensor->op_params))[i] = value;
 }
@@ -3250,7 +3250,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
 
     while (obj != NULL) {
         if (obj->type == GGML_OBJECT_TENSOR) {
-            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
+            const struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
             if (strcmp(cur->name, name) == 0) {
                 return cur;
             }
@@ -3267,7 +3267,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
 // ggml_dup
 
 static struct ggml_tensor * ggml_dup_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         bool inplace) {
     bool is_node = false;
@@ -3300,7 +3300,7 @@ struct ggml_tensor * ggml_dup_inplace(
 // ggml_add
 
 static struct ggml_tensor * ggml_add_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         bool inplace) {
@@ -3341,7 +3341,7 @@ struct ggml_tensor * ggml_add_inplace(
 // ggml_add_cast
 
 static struct ggml_tensor * ggml_add_cast_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         enum   ggml_type     type) {
@@ -3379,7 +3379,7 @@ struct ggml_tensor * ggml_add_cast(
 // ggml_add1
 
 static struct ggml_tensor * ggml_add1_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         bool inplace) {
@@ -3419,7 +3419,7 @@ struct ggml_tensor * ggml_add1_inplace(
 // ggml_acc
 
 static struct ggml_tensor * ggml_acc_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         size_t               nb1,
@@ -3476,7 +3476,7 @@ struct ggml_tensor * ggml_acc_inplace(
 // ggml_sub
 
 static struct ggml_tensor * ggml_sub_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         bool inplace) {
@@ -3515,7 +3515,7 @@ struct ggml_tensor * ggml_sub_inplace(
 // ggml_mul
 
 static struct ggml_tensor * ggml_mul_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         bool inplace) {
@@ -3560,7 +3560,7 @@ struct ggml_tensor * ggml_mul_inplace(
 // ggml_div
 
 static struct ggml_tensor * ggml_div_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         struct ggml_tensor * b,
         bool inplace) {
@@ -3603,7 +3603,7 @@ struct ggml_tensor * ggml_div_inplace(
 // ggml_sqr
 
 static struct ggml_tensor * ggml_sqr_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         bool inplace) {
     bool is_node = false;
@@ -3636,7 +3636,7 @@ struct ggml_tensor * ggml_sqr_inplace(
 // ggml_sqrt
 
 static struct ggml_tensor * ggml_sqrt_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         bool inplace) {
     bool is_node = false;
@@ -3669,7 +3669,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
 // ggml_log
 
 static struct ggml_tensor * ggml_log_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         bool inplace) {
     bool is_node = false;
@@ -4063,7 +4063,7 @@ struct ggml_tensor * ggml_hardsigmoid(
 // ggml_norm
 
 static struct ggml_tensor * ggml_norm_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         float eps,
         bool inplace) {
@@ -4102,7 +4102,7 @@ struct ggml_tensor * ggml_norm_inplace(
 // ggml_rms_norm
 
 static struct ggml_tensor * ggml_rms_norm_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         float eps,
         bool inplace) {
@@ -4166,7 +4166,7 @@ struct ggml_tensor * ggml_rms_norm_back(
 // ggml_group_norm
 
 static struct ggml_tensor * ggml_group_norm_impl(
-    struct ggml_context * ctx,
+    const struct ggml_context * ctx,
     struct ggml_tensor * a,
     int n_groups,
     bool inplace) {
@@ -4311,7 +4311,7 @@ struct ggml_tensor * ggml_out_prod(
 // ggml_scale
 
 static struct ggml_tensor * ggml_scale_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         float                 s,
         bool inplace) {
@@ -4351,7 +4351,7 @@ struct ggml_tensor * ggml_scale_inplace(
 // ggml_set
 
 static struct ggml_tensor * ggml_set_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         struct ggml_tensor  * b,
         size_t                nb1,
@@ -4440,7 +4440,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
 // ggml_cpy
 
 static struct ggml_tensor * ggml_cpy_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         struct ggml_tensor  * b) {
     GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
@@ -4495,7 +4495,7 @@ struct ggml_tensor * ggml_cast(
 // ggml_cont
 
 static struct ggml_tensor * ggml_cont_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a) {
     bool is_node = false;
 
@@ -4699,7 +4699,7 @@ struct ggml_tensor * ggml_reshape_4d(
 }
 
 static struct ggml_tensor * ggml_view_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         int                   n_dims,
         const int64_t       * ne,
@@ -4979,7 +4979,7 @@ struct ggml_tensor * ggml_diag(
 // ggml_diag_mask_inf
 
 static struct ggml_tensor * ggml_diag_mask_inf_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         int                   n_past,
         bool                  inplace) {
@@ -5018,7 +5018,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
 // ggml_diag_mask_zero
 
 static struct ggml_tensor * ggml_diag_mask_zero_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         int                   n_past,
         bool                  inplace) {
@@ -5057,7 +5057,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
 // ggml_soft_max
 
 static struct ggml_tensor * ggml_soft_max_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         struct ggml_tensor  * mask,
         float                 scale,
@@ -5112,7 +5112,7 @@ struct ggml_tensor * ggml_soft_max_ext(
 // ggml_soft_max_back
 
 static struct ggml_tensor * ggml_soft_max_back_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         struct ggml_tensor  * b,
         bool                  inplace) {
@@ -5149,7 +5149,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
 // ggml_rope
 
 static struct ggml_tensor * ggml_rope_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         struct ggml_tensor  * b,
         int                   n_dims,
@@ -5695,7 +5695,7 @@ struct ggml_tensor * ggml_pool_2d(
 // ggml_upscale
 
 static struct ggml_tensor * ggml_upscale_impl(
-    struct ggml_context * ctx,
+    const struct ggml_context * ctx,
     struct ggml_tensor * a,
     int scale_factor) {
     bool is_node = false;
@@ -6025,7 +6025,7 @@ struct ggml_tensor * ggml_get_rel_pos(
 // ggml_add_rel_pos
 
 static struct ggml_tensor * ggml_add_rel_pos_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor  * a,
         struct ggml_tensor  * pw,
         struct ggml_tensor  * ph,
@@ -6077,7 +6077,7 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
 // gmml_unary
 
 static struct ggml_tensor * ggml_unary_impl(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_tensor * a,
         enum ggml_unary_op op,
         bool inplace) {
@@ -6563,7 +6563,7 @@ void ggml_set_param(
 static void ggml_compute_forward_dup_same_cont(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
     GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == dst->type);
@@ -6595,7 +6595,7 @@ static void ggml_compute_forward_dup_same_cont(
 static void ggml_compute_forward_dup_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -6866,7 +6866,7 @@ static void ggml_compute_forward_dup_f16(
 static void ggml_compute_forward_dup_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7116,7 +7116,7 @@ static void ggml_compute_forward_dup_f32(
 static void ggml_compute_forward_dup_bytes(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
     GGML_ASSERT(src0->type == dst->type);
 
@@ -7265,7 +7265,7 @@ static void ggml_compute_forward_dup_bytes(
 static void ggml_compute_forward_dup(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     if (src0->type == dst->type) {
         ggml_compute_forward_dup_bytes(params, src0, dst);
         return;
@@ -7293,7 +7293,7 @@ static void ggml_compute_forward_add_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7381,7 +7381,7 @@ static void ggml_compute_forward_add_f16_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7458,7 +7458,7 @@ static void ggml_compute_forward_add_f16_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7512,7 +7512,7 @@ static void ggml_compute_forward_add_q_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7590,7 +7590,7 @@ static void ggml_compute_forward_add(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -7642,7 +7642,7 @@ static void ggml_compute_forward_add1_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
     GGML_ASSERT(ggml_is_scalar(src1));
 
@@ -7694,7 +7694,7 @@ static void ggml_compute_forward_add1_f16_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
     GGML_ASSERT(ggml_is_scalar(src1));
 
@@ -7744,7 +7744,7 @@ static void ggml_compute_forward_add1_f16_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
     GGML_ASSERT(ggml_is_scalar(src1));
 
@@ -7794,7 +7794,7 @@ static void ggml_compute_forward_add1_q_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
     GGML_ASSERT(ggml_is_scalar(src1));
 
@@ -7861,7 +7861,7 @@ static void ggml_compute_forward_add1(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -7909,7 +7909,7 @@ static void ggml_compute_forward_acc_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
     GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
 
@@ -7991,7 +7991,7 @@ static void ggml_compute_forward_acc(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
 
     switch (src0->type) {
         case GGML_TYPE_F32:
@@ -8026,7 +8026,7 @@ static void ggml_compute_forward_sub_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
 
@@ -8086,7 +8086,7 @@ static void ggml_compute_forward_sub(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8105,7 +8105,7 @@ static void ggml_compute_forward_mul_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8188,7 +8188,7 @@ static void ggml_compute_forward_mul(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
 
     switch (src0->type) {
@@ -8209,7 +8209,7 @@ static void ggml_compute_forward_div_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8282,7 +8282,7 @@ static void ggml_compute_forward_div(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8300,7 +8300,7 @@ static void ggml_compute_forward_div(
 static void ggml_compute_forward_sqr_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -8324,7 +8324,7 @@ static void ggml_compute_forward_sqr_f32(
 static void ggml_compute_forward_sqr(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8342,7 +8342,7 @@ static void ggml_compute_forward_sqr(
 static void ggml_compute_forward_sqrt_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -8366,7 +8366,7 @@ static void ggml_compute_forward_sqrt_f32(
 static void ggml_compute_forward_sqrt(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8384,7 +8384,7 @@ static void ggml_compute_forward_sqrt(
 static void ggml_compute_forward_log_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
 
@@ -8408,7 +8408,7 @@ static void ggml_compute_forward_log_f32(
 static void ggml_compute_forward_log(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8426,7 +8426,7 @@ static void ggml_compute_forward_log(
 static void ggml_compute_forward_sum_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_is_scalar(dst));
 
@@ -8459,7 +8459,7 @@ static void ggml_compute_forward_sum_f32(
 static void ggml_compute_forward_sum_f16(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-          struct ggml_tensor * dst) {
+          const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_is_scalar(dst));
 
@@ -8491,7 +8491,7 @@ static void ggml_compute_forward_sum_f16(
 static void ggml_compute_forward_sum(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8513,7 +8513,7 @@ static void ggml_compute_forward_sum(
 static void ggml_compute_forward_sum_rows_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8546,7 +8546,7 @@ static void ggml_compute_forward_sum_rows_f32(
 static void ggml_compute_forward_sum_rows(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8564,7 +8564,7 @@ static void ggml_compute_forward_sum_rows(
 static void ggml_compute_forward_mean_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8601,7 +8601,7 @@ static void ggml_compute_forward_mean_f32(
 static void ggml_compute_forward_mean(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8619,7 +8619,7 @@ static void ggml_compute_forward_mean(
 static void ggml_compute_forward_argmax_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8647,7 +8647,7 @@ static void ggml_compute_forward_argmax_f32(
 static void ggml_compute_forward_argmax(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8665,7 +8665,7 @@ static void ggml_compute_forward_argmax(
 static void ggml_compute_forward_repeat_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
     GGML_ASSERT(ggml_can_repeat(src0, dst));
 
@@ -8708,7 +8708,7 @@ static void ggml_compute_forward_repeat_f32(
 static void ggml_compute_forward_repeat_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
     GGML_ASSERT(ggml_can_repeat(src0, dst));
 
@@ -8754,7 +8754,7 @@ static void ggml_compute_forward_repeat_f16(
 static void ggml_compute_forward_repeat(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
         case GGML_TYPE_I16:
@@ -8778,7 +8778,7 @@ static void ggml_compute_forward_repeat(
 static void ggml_compute_forward_repeat_back_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
     GGML_ASSERT(ggml_can_repeat(dst, src0));
 
@@ -8835,7 +8835,7 @@ static void ggml_compute_forward_repeat_back_f32(
 static void ggml_compute_forward_repeat_back(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8854,7 +8854,7 @@ static void ggml_compute_forward_concat_f32(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
     const struct ggml_tensor * src1,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
@@ -8921,7 +8921,7 @@ static void ggml_compute_forward_concat(
 static void ggml_compute_forward_abs_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -8945,7 +8945,7 @@ static void ggml_compute_forward_abs_f32(
 static void ggml_compute_forward_abs(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -8963,7 +8963,7 @@ static void ggml_compute_forward_abs(
 static void ggml_compute_forward_sgn_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -8987,7 +8987,7 @@ static void ggml_compute_forward_sgn_f32(
 static void ggml_compute_forward_sgn(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9005,7 +9005,7 @@ static void ggml_compute_forward_sgn(
 static void ggml_compute_forward_neg_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9029,7 +9029,7 @@ static void ggml_compute_forward_neg_f32(
 static void ggml_compute_forward_neg(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9047,7 +9047,7 @@ static void ggml_compute_forward_neg(
 static void ggml_compute_forward_step_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9071,7 +9071,7 @@ static void ggml_compute_forward_step_f32(
 static void ggml_compute_forward_step(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9089,7 +9089,7 @@ static void ggml_compute_forward_step(
 static void ggml_compute_forward_tanh_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9113,7 +9113,7 @@ static void ggml_compute_forward_tanh_f32(
 static void ggml_compute_forward_tanh(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9131,7 +9131,7 @@ static void ggml_compute_forward_tanh(
 static void ggml_compute_forward_elu_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9155,7 +9155,7 @@ static void ggml_compute_forward_elu_f32(
 static void ggml_compute_forward_elu(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9173,7 +9173,7 @@ static void ggml_compute_forward_elu(
 static void ggml_compute_forward_relu_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9197,7 +9197,7 @@ static void ggml_compute_forward_relu_f32(
 static void ggml_compute_forward_relu(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9215,7 +9215,7 @@ static void ggml_compute_forward_relu(
 static void ggml_compute_forward_gelu_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -9256,7 +9256,7 @@ static void ggml_compute_forward_gelu_f32(
 static void ggml_compute_forward_gelu(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9274,7 +9274,7 @@ static void ggml_compute_forward_gelu(
 static void ggml_compute_forward_gelu_quick_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -9315,7 +9315,7 @@ static void ggml_compute_forward_gelu_quick_f32(
 static void ggml_compute_forward_gelu_quick(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9333,7 +9333,7 @@ static void ggml_compute_forward_gelu_quick(
 static void ggml_compute_forward_silu_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -9374,7 +9374,7 @@ static void ggml_compute_forward_silu_f32(
 static void ggml_compute_forward_silu(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9391,7 +9391,7 @@ static void ggml_compute_forward_silu(
 static void ggml_compute_forward_leaky_relu_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9418,7 +9418,7 @@ static void ggml_compute_forward_leaky_relu_f32(
 static void ggml_compute_forward_leaky_relu(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9437,7 +9437,7 @@ static void ggml_compute_forward_silu_back_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * grad,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
     GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
@@ -9482,7 +9482,7 @@ static void ggml_compute_forward_silu_back(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * grad,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9499,7 +9499,7 @@ static void ggml_compute_forward_silu_back(
 static void ggml_compute_forward_hardswish_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9522,7 +9522,7 @@ static void ggml_compute_forward_hardswish_f32(
 static void ggml_compute_forward_hardswish(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9538,7 +9538,7 @@ static void ggml_compute_forward_hardswish(
 static void ggml_compute_forward_hardsigmoid_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
 
@@ -9562,7 +9562,7 @@ static void ggml_compute_forward_hardsigmoid_f32(
 static void ggml_compute_forward_hardsigmoid(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9581,7 +9581,7 @@ static void ggml_compute_forward_hardsigmoid(
 static void ggml_compute_forward_norm_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9634,7 +9634,7 @@ static void ggml_compute_forward_norm_f32(
 static void ggml_compute_forward_norm(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9652,7 +9652,7 @@ static void ggml_compute_forward_norm(
 static void ggml_compute_forward_rms_norm_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9702,7 +9702,7 @@ static void ggml_compute_forward_rms_norm_f32(
 static void ggml_compute_forward_rms_norm(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9719,7 +9719,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9876,7 +9876,7 @@ static void ggml_compute_forward_rms_norm_back(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -9894,7 +9894,7 @@ static void ggml_compute_forward_rms_norm_back(
 static void ggml_compute_forward_group_norm_f32(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9966,7 +9966,7 @@ static void ggml_compute_forward_group_norm_f32(
 static void ggml_compute_forward_group_norm(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -10018,7 +10018,7 @@ static void ggml_compute_forward_mul_mat(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -10265,7 +10265,7 @@ static void ggml_compute_forward_mul_mat_id(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * ids,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
 
     const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
 
@@ -10459,7 +10459,7 @@ static void ggml_compute_forward_out_prod_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     // int64_t t0 = ggml_perf_time_us();
     // UNUSED(t0);
 
@@ -10651,7 +10651,7 @@ static void ggml_compute_forward_out_prod_q_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     // int64_t t0 = ggml_perf_time_us();
     // UNUSED(t0);
 
@@ -10764,7 +10764,7 @@ static void ggml_compute_forward_out_prod(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_Q4_0:
         case GGML_TYPE_Q4_1:
@@ -10803,7 +10803,7 @@ static void ggml_compute_forward_out_prod(
 static void ggml_compute_forward_scale_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(ggml_is_contiguous(dst));
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -10845,7 +10845,7 @@ static void ggml_compute_forward_scale_f32(
 static void ggml_compute_forward_scale(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -10864,7 +10864,7 @@ static void ggml_compute_forward_set_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_are_same_shape(src0, dst));
     GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
 
@@ -10937,7 +10937,7 @@ static void ggml_compute_forward_set(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
 
     switch (src0->type) {
         case GGML_TYPE_F32:
@@ -10971,7 +10971,7 @@ static void ggml_compute_forward_set(
 static void ggml_compute_forward_cpy(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     ggml_compute_forward_dup(params, src0, dst);
 }
 
@@ -10980,7 +10980,7 @@ static void ggml_compute_forward_cpy(
 static void ggml_compute_forward_cont(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     ggml_compute_forward_dup(params, src0, dst);
 }
 
@@ -10989,7 +10989,7 @@ static void ggml_compute_forward_cont(
 static void ggml_compute_forward_reshape(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     // NOP
     UNUSED(params);
     UNUSED(src0);
@@ -11032,7 +11032,7 @@ static void ggml_compute_forward_get_rows_q(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11070,7 +11070,7 @@ static void ggml_compute_forward_get_rows_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11105,7 +11105,7 @@ static void ggml_compute_forward_get_rows_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11140,7 +11140,7 @@ static void ggml_compute_forward_get_rows(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_Q4_0:
         case GGML_TYPE_Q4_1:
@@ -11199,7 +11199,7 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
     GGML_ASSERT(ggml_is_contiguous(dst));
 
@@ -11236,7 +11236,7 @@ static void ggml_compute_forward_get_rows_back_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
     GGML_ASSERT(ggml_is_contiguous(dst));
 
@@ -11273,7 +11273,7 @@ static void ggml_compute_forward_get_rows_back(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
@@ -11313,7 +11313,7 @@ static void ggml_compute_forward_get_rows_back(
 static void ggml_compute_forward_diag_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11353,7 +11353,7 @@ static void ggml_compute_forward_diag_f32(
 static void ggml_compute_forward_diag(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -11424,7 +11424,7 @@ static void ggml_compute_forward_diag_mask_f32(
 static void ggml_compute_forward_diag_mask_inf(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -11440,7 +11440,7 @@ static void ggml_compute_forward_diag_mask_inf(
 static void ggml_compute_forward_diag_mask_zero(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -11459,7 +11459,7 @@ static void ggml_compute_forward_soft_max_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     assert(ggml_is_contiguous(dst));
     assert(ggml_are_same_shape(src0, dst));
 
@@ -11546,7 +11546,7 @@ static void ggml_compute_forward_soft_max(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -11565,7 +11565,7 @@ static void ggml_compute_forward_soft_max_back_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(ggml_is_contiguous(src1));
     GGML_ASSERT(ggml_is_contiguous(dst));
@@ -11642,7 +11642,7 @@ static void ggml_compute_forward_soft_max_back(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -11660,7 +11660,7 @@ static void ggml_compute_forward_soft_max_back(
 static void ggml_compute_forward_alibi_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11719,7 +11719,7 @@ static void ggml_compute_forward_alibi_f32(
 static void ggml_compute_forward_alibi_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11780,7 +11780,7 @@ static void ggml_compute_forward_alibi_f16(
 static void ggml_compute_forward_alibi(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
@@ -11820,7 +11820,7 @@ static void ggml_compute_forward_alibi(
 static void ggml_compute_forward_clamp_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(params->ith == 0);
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11860,7 +11860,7 @@ static void ggml_compute_forward_clamp_f32(
 static void ggml_compute_forward_clamp(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -12295,7 +12295,7 @@ static void ggml_compute_forward_rope(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
@@ -12318,7 +12318,7 @@ static void ggml_compute_forward_rope_back(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
@@ -12341,7 +12341,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(src0->type == GGML_TYPE_F16);
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12438,7 +12438,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12535,7 +12535,7 @@ static void ggml_compute_forward_conv_transpose_1d(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
@@ -12559,7 +12559,7 @@ static void ggml_compute_forward_im2col_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(src0->type == GGML_TYPE_F16);
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12645,7 +12645,7 @@ static void ggml_compute_forward_im2col_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(src0->type == GGML_TYPE_F16);
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F16);
@@ -12727,7 +12727,7 @@ static void ggml_compute_forward_im2col(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     switch (dst->type) {
         case GGML_TYPE_F16:
             {
@@ -12751,7 +12751,7 @@ static void ggml_compute_forward_conv_transpose_2d(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     GGML_ASSERT(src0->type == GGML_TYPE_F16);
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12857,7 +12857,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
         const enum ggml_op_pool op,
         const struct ggml_tensor * src,
         const int k,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     assert(src->type == GGML_TYPE_F32);
     assert(params->ith == 0);
 
@@ -12907,7 +12907,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
 static void ggml_compute_forward_pool_1d(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
 
     const int32_t * opts = (const int32_t *)dst->op_params;
     enum ggml_op_pool op = opts[0];
@@ -12925,7 +12925,7 @@ static void ggml_compute_forward_pool_1d(
 static void ggml_compute_forward_pool_2d(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(src->type == GGML_TYPE_F32);
     GGML_ASSERT(params->ith == 0);
 
@@ -12999,7 +12999,7 @@ static void ggml_compute_forward_pool_2d(
 static void ggml_compute_forward_upscale_f32(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
@@ -13038,7 +13038,7 @@ static void ggml_compute_forward_upscale_f32(
 static void ggml_compute_forward_upscale(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -13056,7 +13056,7 @@ static void ggml_compute_forward_upscale(
 static void ggml_compute_forward_pad_f32(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-          struct ggml_tensor * dst) {
+          const struct ggml_tensor * dst) {
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
@@ -13096,7 +13096,7 @@ static void ggml_compute_forward_pad_f32(
 static void ggml_compute_forward_pad(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -13114,7 +13114,7 @@ static void ggml_compute_forward_pad(
 static void ggml_compute_forward_argsort_f32(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
 
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
@@ -13156,7 +13156,7 @@ static void ggml_compute_forward_argsort_f32(
 static void ggml_compute_forward_argsort(
     const struct ggml_compute_params * params,
     const struct ggml_tensor * src0,
-    struct ggml_tensor * dst) {
+    const struct ggml_tensor * dst) {
 
     switch (src0->type) {
         case GGML_TYPE_F32:
@@ -13178,7 +13178,7 @@ static void ggml_compute_forward_flash_attn_f32(
         const struct ggml_tensor * k,
         const struct ggml_tensor * v,
         const bool masked,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -13368,7 +13368,7 @@ static void ggml_compute_forward_flash_attn_f16(
         const struct ggml_tensor * k,
         const struct ggml_tensor * v,
         const bool masked,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -13594,7 +13594,7 @@ static void ggml_compute_forward_flash_attn(
         const struct ggml_tensor * k,
         const struct ggml_tensor * v,
         const bool masked,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (q->type) {
         case GGML_TYPE_F16:
             {
@@ -13620,7 +13620,7 @@ static void ggml_compute_forward_flash_ff_f16(
         const struct ggml_tensor * b1, // F32 fc_b
         const struct ggml_tensor * c0, // F16 proj_w
         const struct ggml_tensor * c1, // F32 proj_b
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -13753,7 +13753,7 @@ static void ggml_compute_forward_flash_ff(
         const struct ggml_tensor * b1,
         const struct ggml_tensor * c0,
         const struct ggml_tensor * c1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (b0->type) {
         case GGML_TYPE_F16:
             {
@@ -13779,7 +13779,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
         const struct ggml_tensor * v,
         const struct ggml_tensor * d,
         const bool masked,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -14132,7 +14132,7 @@ static void ggml_compute_forward_flash_attn_back(
         const struct ggml_tensor * v,
         const struct ggml_tensor * d,
         const bool masked,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (q->type) {
         case GGML_TYPE_F32:
             {
@@ -14150,7 +14150,7 @@ static void ggml_compute_forward_flash_attn_back(
 static void ggml_compute_forward_win_part_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14194,7 +14194,7 @@ static void ggml_compute_forward_win_part_f32(
 static void ggml_compute_forward_win_part(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -14212,7 +14212,7 @@ static void ggml_compute_forward_win_part(
 static void ggml_compute_forward_win_unpart_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14254,7 +14254,7 @@ static void ggml_compute_forward_win_unpart_f32(
 static void ggml_compute_forward_win_unpart(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -14272,7 +14272,7 @@ static void ggml_compute_forward_win_unpart(
 static void ggml_compute_forward_unary(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     const enum ggml_unary_op op = ggml_get_unary_op(dst);
 
     switch (op) {
@@ -14336,7 +14336,7 @@ static void ggml_compute_forward_unary(
 static void ggml_compute_forward_get_rel_pos_f16(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14363,7 +14363,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
 static void ggml_compute_forward_get_rel_pos(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
@@ -14383,7 +14383,7 @@ static void ggml_compute_forward_add_rel_pos_f32(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
         const struct ggml_tensor * src2,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
 
     const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
     if (!inplace && params->type == GGML_TASK_INIT) {
@@ -14451,7 +14451,7 @@ static void ggml_compute_forward_add_rel_pos(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
         const struct ggml_tensor * src2,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -14611,7 +14611,7 @@ static void ggml_compute_forward_map_custom3_f32(
 static void ggml_compute_forward_map_custom1(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * a,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14627,7 +14627,7 @@ static void ggml_compute_forward_map_custom2(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * a,
         const struct ggml_tensor * b,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14644,7 +14644,7 @@ static void ggml_compute_forward_map_custom3(
         const struct ggml_tensor * a,
         const struct ggml_tensor * b,
         const struct ggml_tensor * c,
-              struct ggml_tensor * dst) {
+              const struct ggml_tensor * dst) {
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14660,7 +14660,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(ggml_is_contiguous(src1));
     GGML_ASSERT(ggml_is_scalar(dst));
@@ -14766,7 +14766,7 @@ static void ggml_compute_forward_cross_entropy_loss(
         const struct ggml_compute_params * params,
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -14786,7 +14786,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
         const struct ggml_tensor * opt0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     GGML_ASSERT(ggml_is_contiguous(dst));
     GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(ggml_is_contiguous(src1));
@@ -14876,7 +14876,7 @@ static void ggml_compute_forward_cross_entropy_loss_back(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
         const struct ggml_tensor * opt0,
-        struct ggml_tensor * dst) {
+        const struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
@@ -14891,7 +14891,7 @@ static void ggml_compute_forward_cross_entropy_loss_back(
 
 /////////////////////////////////
 
-static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
+static void ggml_compute_forward(struct ggml_compute_params * params, const struct ggml_tensor * tensor) {
     GGML_ASSERT(params);
 
     if (tensor->op == GGML_OP_NONE) {
@@ -15358,7 +15358,7 @@ static void ggml_hash_map_free(struct hash_map * map) {
 // gradient checkpointing
 
 static struct ggml_tensor * ggml_recompute_graph_node(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_cgraph  * graph,
         struct hash_map     * replacements,
         struct ggml_tensor  * node) {
@@ -16142,7 +16142,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
             } break;
         case GGML_OP_FLASH_ATTN:
             {
-                struct ggml_tensor * flash_grad = NULL;
+                const struct ggml_tensor * flash_grad = NULL;
                 if (src0->grad || src1->grad || tensor->src[2]->grad) {
                     int32_t t = ggml_get_op_params_i32(tensor, 0);
                     GGML_ASSERT(t == 0 || t == 1);
@@ -16324,7 +16324,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
     }
 }
 
-static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
+static void ggml_visit_parents(struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
     if (node->grad == NULL) {
         // this usually happens when we generate intermediate nodes from constants in the backward pass
         // it can also happen during forward pass, if the user performs computations with constants
@@ -16421,7 +16421,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
     }
 
     for (int i = gf->n_nodes - 1; i >= 0; i--) {
-        struct ggml_tensor * node = gf->nodes[i];
+        const struct ggml_tensor * node = gf->nodes[i];
 
         // inplace operations to add gradients are not created by ggml_compute_backward
         // use allocator to automatically make inplace operations
@@ -16431,7 +16431,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
     }
 
     for (int i = 0; i < gf->n_nodes; i++) {
-        struct ggml_tensor * node = gf->nodes[i];
+        const struct ggml_tensor * node = gf->nodes[i];
 
         if (node->flags & GGML_TENSOR_FLAG_PARAM) {
             GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
@@ -16724,7 +16724,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
     node->perf_time_us += time_us_cur;
 }
 
-static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
+static int ggml_get_n_tasks(const struct ggml_tensor * node, int n_threads) {
     int n_tasks = 0;
 
     switch (node->op) {
@@ -16893,7 +16893,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
             } break;
         case GGML_OP_MAP_CUSTOM1:
             {
-                struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
+                const struct ggml_map_custom1_op_params * p = (const struct ggml_map_custom1_op_params *) node->op_params;
                 if (p->n_tasks == GGML_N_TASKS_MAX) {
                     n_tasks = n_threads;
                 } else {
@@ -16902,7 +16902,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
             } break;
         case GGML_OP_MAP_CUSTOM2:
             {
-                struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
+                const struct ggml_map_custom2_op_params * p = (const struct ggml_map_custom2_op_params *) node->op_params;
                 if (p->n_tasks == GGML_N_TASKS_MAX) {
                     n_tasks = n_threads;
                 } else {
@@ -16911,7 +16911,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
             } break;
         case GGML_OP_MAP_CUSTOM3:
             {
-                struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
+                const struct ggml_map_custom3_op_params * p = (const struct ggml_map_custom3_op_params *) node->op_params;
                 if (p->n_tasks == GGML_N_TASKS_MAX) {
                     n_tasks = n_threads;
                 } else {
@@ -17011,7 +17011,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
 
             if (node_n != -1) {
                 /* FINALIZE */
-                struct ggml_tensor * node = cgraph->nodes[node_n];
+                const struct ggml_tensor * node = cgraph->nodes[node_n];
                 if (GGML_OP_HAS_FINALIZE[node->op]) {
                     params.nth = ggml_get_n_tasks(node, n_threads);
                     ggml_compute_forward(&params, node);
@@ -17022,7 +17022,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
             // distribute new work or execute it direct if 1T
             while (++node_n < cgraph->n_nodes) {
                 GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
-                struct ggml_tensor * node = cgraph->nodes[node_n];
+                const struct ggml_tensor * node = cgraph->nodes[node_n];
                 const int n_tasks = ggml_get_n_tasks(node, n_threads);
 
                 state->shared->perf_node_start_cycles  = ggml_perf_cycles();
@@ -17070,7 +17070,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
         if (node_n >= cgraph->n_nodes) break;
 
         /* INIT & COMPUTE */
-        struct ggml_tensor * node = cgraph->nodes[node_n];
+        const struct ggml_tensor * node = cgraph->nodes[node_n];
         const int n_tasks = ggml_get_n_tasks(node, n_threads);
 
         struct ggml_compute_params params = {
@@ -17134,7 +17134,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
 
     // thread scheduling for the different operations + work buffer size estimation
     for (int i = 0; i < cgraph->n_nodes; i++) {
-        struct ggml_tensor * node = cgraph->nodes[i];
+        const struct ggml_tensor * node = cgraph->nodes[i];
 
         const int n_tasks = ggml_get_n_tasks(node, n_threads);
 
@@ -17421,7 +17421,7 @@ void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph *
 
 struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
     for (int i = 0; i < cgraph->n_leafs; i++) {
-        struct ggml_tensor * leaf = cgraph->leafs[i];
+        const struct ggml_tensor * leaf = cgraph->leafs[i];
 
         if (strcmp(leaf->name, name) == 0) {
             return leaf;
@@ -17429,7 +17429,7 @@ struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const ch
     }
 
     for (int i = 0; i < cgraph->n_nodes; i++) {
-        struct ggml_tensor * node = cgraph->nodes[i];
+        const struct ggml_tensor * node = cgraph->nodes[i];
 
         if (strcmp(node->name, name) == 0) {
             return node;
@@ -17600,7 +17600,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
 
                 // output the op arguments
                 {
-                    struct ggml_tensor * args[GGML_MAX_SRC] = { NULL };
+                    const struct ggml_tensor * args[GGML_MAX_SRC] = { NULL };
 
                     for (int j = 0; j < GGML_MAX_SRC; ++j) {
                         args[j] = tensor->src[j];
@@ -17911,7 +17911,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
 
     GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
     for (int i = 0; i < cgraph->n_nodes; i++) {
-        struct ggml_tensor * node = cgraph->nodes[i];
+        const struct ggml_tensor * node = cgraph->nodes[i];
 
         perf_total_per_op_us[node->op] += MAX(1, node->perf_time_us);
 
@@ -17927,7 +17927,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
 
     GGML_PRINT("n_leafs = %d\n", cgraph->n_leafs);
     for (int i = 0; i < cgraph->n_leafs; i++) {
-        struct ggml_tensor * node = cgraph->leafs[i];
+        const struct ggml_tensor * node = cgraph->leafs[i];
 
         GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
                 i,
@@ -17964,7 +17964,7 @@ static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml
 
 static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
     for (int i = 0; i < cgraph->n_nodes; i++) {
-        struct ggml_tensor * parent = cgraph->nodes[i];
+        const struct ggml_tensor * parent = cgraph->nodes[i];
 
         if (parent->grad == node) {
             return parent;
@@ -17974,7 +17974,7 @@ static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgr
     return NULL;
 }
 
-static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
+static void ggml_graph_dump_dot_node_edge(FILE * fp, const const struct ggml_cgraph * gb, const struct ggml_tensor * node, const struct ggml_tensor * parent, const char * label)  {
     struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
     struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
     fprintf(fp, "  \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
@@ -17987,14 +17987,14 @@ static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph *
             label);
 }
 
-static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
+static void ggml_graph_dump_dot_leaf_edge(FILE * fp, const struct ggml_tensor * node, const struct ggml_tensor * parent, const char * label)  {
     fprintf(fp, "  \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
             (void *) parent, "x",
             (void *) node, "x",
             label);
 }
 
-void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
+void ggml_graph_dump_dot(const const struct ggml_cgraph * gb, const const struct ggml_cgraph * gf, const char * filename) {
     char color[16];
 
     FILE * fp = fopen(filename, "w");
@@ -18005,7 +18005,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
     fprintf(fp, "  rankdir = LR;\n");
 
     for (int i = 0; i < gb->n_nodes; i++) {
-        struct ggml_tensor * node = gb->nodes[i];
+        const struct ggml_tensor * node = gb->nodes[i];
 
         if (ggml_graph_get_parent(gb, node) != NULL) {
             continue;
@@ -18048,7 +18048,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
     }
 
     for (int i = 0; i < gb->n_leafs; i++) {
-        struct ggml_tensor * node = gb->leafs[i];
+        const struct ggml_tensor * node = gb->leafs[i];
 
         snprintf(color, sizeof(color), "pink");
 
@@ -18086,7 +18086,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
     }
 
     for (int i = 0; i < gb->n_nodes; i++) {
-        struct ggml_tensor * node = gb->nodes[i];
+        const struct ggml_tensor * node = gb->nodes[i];
 
         for (int j = 0; j < GGML_MAX_SRC; j++) {
             if (node->src[j]) {
@@ -18098,7 +18098,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
     }
 
     for (int i = 0; i < gb->n_leafs; i++) {
-        struct ggml_tensor * node = gb->leafs[i];
+        const struct ggml_tensor * node = gb->leafs[i];
 
         for (int j = 0; j < GGML_MAX_SRC; j++) {
             if (node->src[j]) {
@@ -18169,12 +18169,12 @@ static void ggml_opt_acc_grad(int np, struct ggml_tensor * const ps[], float * g
 //
 
 static enum ggml_opt_result ggml_opt_adam(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_opt_context * opt,
-        struct ggml_opt_params params,
-        struct ggml_tensor * f,
-        struct ggml_cgraph * gf,
-        struct ggml_cgraph * gb,
+        const struct ggml_opt_params params,
+        const struct ggml_tensor * f,
+        const struct ggml_cgraph * gf,
+        const struct ggml_cgraph * gb,
         ggml_opt_callback callback,
         void * callback_data) {
     GGML_ASSERT(ggml_is_scalar(f));
@@ -18411,8 +18411,8 @@ static enum ggml_opt_result linesearch_backtracking(
         float * d,
         float * step,
         const float * xp,
-        struct ggml_tensor * f,
-        struct ggml_cgraph * gb,
+        const struct ggml_tensor * f,
+        const struct ggml_cgraph * gb,
         struct ggml_cplan  * cplan,
         const int np,
         struct ggml_tensor * ps[],
@@ -18525,12 +18525,12 @@ static enum ggml_opt_result linesearch_backtracking(
 }
 
 static enum ggml_opt_result ggml_opt_lbfgs(
-        struct ggml_context * ctx,
+        const struct ggml_context * ctx,
         struct ggml_opt_context * opt,
-        struct ggml_opt_params params,
-        struct ggml_tensor * f,
-        struct ggml_cgraph * gf,
-        struct ggml_cgraph * gb,
+        const struct ggml_opt_params params,
+        const struct ggml_tensor * f,
+        const struct ggml_cgraph * gf,
+        const struct ggml_cgraph * gb,
         ggml_opt_callback callback,
         void * callback_data) {
     if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
@@ -18866,7 +18866,7 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
 GGML_API void ggml_opt_init(
         struct ggml_context * ctx,
         struct ggml_opt_context * opt,
-        struct ggml_opt_params params,
+        const struct ggml_opt_params params,
         int64_t nx) {
     opt->ctx = ctx;
     opt->params = params;
@@ -18976,10 +18976,10 @@ enum ggml_opt_result ggml_opt_resume(
         struct ggml_tensor * f) {
 
     // build forward + backward compute graphs
-    struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
+    const struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
     ggml_build_forward_expand(gf, f);
 
-    struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
+    const struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
     ggml_build_backward_expand(ctx, gf, gb, true);
 
     return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
@@ -19757,7 +19757,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 
         *params.ctx = ggml_init(pdata);
 
-        struct ggml_context * ctx_data = *params.ctx;
+        const struct ggml_context * ctx_data = *params.ctx;
 
         struct ggml_tensor * data = NULL;