diff --git a/ggml/include/ggml-alloc.h b/ggml/include/ggml-alloc.h index 23600eea9..a861daa53 100644 --- a/ggml/include/ggml-alloc.h +++ b/ggml/include/ggml-alloc.h @@ -46,17 +46,17 @@ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, st typedef struct ggml_gallocr * ggml_gallocr_t; GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); -GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); +GGML_API ggml_gallocr_t ggml_gallocr_new_n(const ggml_backend_buffer_type_t * bufts, int n_bufs); GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); // pre-allocate buffers from a measure graph - does not allocate or modify the graph // call with a worst-case graph to avoid buffer reallocations // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed // returns false if the buffer allocation failed -GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); +GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, const struct ggml_cgraph * graph); GGML_API bool ggml_gallocr_reserve_n( ggml_gallocr_t galloc, - struct ggml_cgraph * graph, + const struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids); diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 5bd8d9c8b..7e8c4c2c2 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -698,7 +698,7 @@ extern "C" { GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); - GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx); + GGML_API bool ggml_get_no_alloc(const struct ggml_context * ctx); GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx); @@ -745,7 +745,7 @@ extern "C" { // Context tensor enumeration and lookup GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx); GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor); - GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); + GGML_API struct ggml_tensor * ggml_get_tensor(const struct ggml_context * ctx, const char * name); // Converts a flat index into coordinates GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3); @@ -763,7 +763,7 @@ extern "C" { // Tensor flags GGML_API void ggml_set_input(struct ggml_tensor * tensor); GGML_API void ggml_set_output(struct ggml_tensor * tensor); - GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor); + GGML_API void ggml_set_param(const struct ggml_context * ctx, struct ggml_tensor * tensor); GGML_API void ggml_set_loss(struct ggml_tensor * tensor); // @@ -927,13 +927,13 @@ extern "C" { GGML_API struct ggml_tensor * ggml_repeat( struct ggml_context * ctx, struct ggml_tensor * a, - struct ggml_tensor * b); + const struct ggml_tensor * b); // sums repetitions in a into shape of b GGML_API struct ggml_tensor * ggml_repeat_back( struct ggml_context * ctx, struct ggml_tensor * a, - struct ggml_tensor * b); + const struct ggml_tensor * b); // concat a and b along dim // used in stable-diffusion @@ -1243,7 +1243,7 @@ extern "C" { GGML_API struct ggml_tensor * ggml_reshape( struct ggml_context * ctx, struct ggml_tensor * a, - struct ggml_tensor * b); + const struct ggml_tensor * b); // return view(a) // TODO: when we start computing gradient, make a copy instead of view @@ -1335,7 +1335,7 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a, // gradients of ggml_get_rows result struct ggml_tensor * b, // row indices - struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape + const struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape GGML_API struct ggml_tensor * ggml_diag( struct ggml_context * ctx, @@ -1563,7 +1563,7 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // gradient of im2col output - int64_t * ne, // shape of im2col input + const int64_t * ne, // shape of im2col input int s0, // stride dimension 0 int s1, // stride dimension 1 int p0, // padding dimension 0 @@ -2062,15 +2062,16 @@ extern "C" { // graph allocation in a context GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads); - GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph); - GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst); - GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1 + GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, const struct ggml_cgraph * cgraph); + GGML_API void ggml_graph_cpy (const struct ggml_cgraph * src, struct ggml_cgraph * dst); + GGML_API void ggml_graph_reset ( + const struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1 GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph); - GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph); - GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i] - GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph); - GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph); + GGML_API int ggml_graph_size (const struct ggml_cgraph * cgraph); + GGML_API struct ggml_tensor * ggml_graph_node (const struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i] + GGML_API struct ggml_tensor ** ggml_graph_nodes (const struct ggml_cgraph * cgraph); + GGML_API int ggml_graph_n_nodes(const struct ggml_cgraph * cgraph); GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 9a3bf9f29..c32645ad8 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -377,7 +377,7 @@ struct ggml_gallocr { int n_leafs; }; -ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) { +ggml_gallocr_t ggml_gallocr_new_n(const ggml_backend_buffer_type_t * bufts, int n_bufs) { ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr)); GGML_ASSERT(galloc != NULL); @@ -563,7 +563,7 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) { return node_buffer_ids ? node_buffer_ids[i] : 0; } -static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { +static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, const struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { // clear hash tables ggml_hash_set_reset(&galloc->hash_set); memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size); @@ -670,7 +670,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr } } -bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { +bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, const struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { size_t min_hash_size = graph->n_nodes + graph->n_leafs; // add 25% margin to avoid hash collisions min_hash_size += min_hash_size / 4; @@ -780,11 +780,11 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c return true; } -bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) { +bool ggml_gallocr_reserve(ggml_gallocr_t galloc, const struct ggml_cgraph *graph) { return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL); } -static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, struct tensor_alloc * tensor_alloc) { +static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, const struct tensor_alloc * tensor_alloc) { int buffer_id = tensor_alloc->buffer_id; assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max); @@ -813,7 +813,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * } } -static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) { +static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, const struct tensor_alloc * talloc) { size_t node_size = 0; if (!node->data && !node->view_src) { GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API @@ -822,7 +822,7 @@ static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_t return talloc->size_max >= node_size; } -static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph * graph) { +static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, const struct ggml_cgraph * graph) { if (galloc->n_nodes != graph->n_nodes) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: graph has different number of nodes\n", __func__); @@ -933,8 +933,8 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) { // utils -static bool alloc_tensor_range(struct ggml_context * ctx, - struct ggml_tensor * first, struct ggml_tensor * last, +static bool alloc_tensor_range(const struct ggml_context * ctx, + struct ggml_tensor * first, const struct ggml_tensor * last, ggml_backend_buffer_type_t buft, size_t size, ggml_backend_buffer_t ** buffers, size_t * n_buffers) { ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size); diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 955ed505f..348d7c21f 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -124,7 +124,7 @@ static void * dl_get_sym(dl_handle * handle, const char * name) { using dl_handle = void; struct dl_handle_deleter { - void operator()(void * handle) { + void operator()(void * handle) const { dlclose(handle); } }; diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index eab017889..81c4b0a3e 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -188,13 +188,13 @@ struct ggml_hash_set { }; struct ggml_hash_set ggml_hash_set_new(size_t size); -void ggml_hash_set_free(struct ggml_hash_set * hash_set); +void ggml_hash_set_free(const struct ggml_hash_set * hash_set); // returns the minimum size for a hash set that can hold min_sz elements size_t ggml_hash_size(size_t min_sz); // remove all elements from the hash set -void ggml_hash_set_reset(struct ggml_hash_set * hash_set); +void ggml_hash_set_reset(const struct ggml_hash_set * hash_set); // returns true if key is in the hash set static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key); @@ -302,7 +302,7 @@ struct ggml_cgraph { // returns a slice of cgraph with nodes [i0, i1) // the slice does not have leafs or gradients // if you need the gradients, get them from the original graph -struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); +struct ggml_cgraph ggml_graph_view(const struct ggml_cgraph * cgraph, int i0, int i1); // Memory allocation diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3b4861542..ea9cd04d4 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1478,7 +1478,7 @@ size_t ggml_used_mem(const struct ggml_context * ctx) { return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size; } -bool ggml_get_no_alloc(struct ggml_context * ctx) { +bool ggml_get_no_alloc(const struct ggml_context * ctx) { return ctx->no_alloc; } @@ -1789,7 +1789,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc return NULL; } -struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { +struct ggml_tensor * ggml_get_tensor(const struct ggml_context * ctx, const char * name) { struct ggml_object * obj = ctx->objects_begin; char * const mem_buffer = ctx->mem_buffer; @@ -1952,7 +1952,7 @@ static struct ggml_tensor * ggml_acc_impl( struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; + const int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ACC; @@ -2292,8 +2292,8 @@ struct ggml_tensor * ggml_count_equal( struct ggml_tensor * ggml_repeat( struct ggml_context * ctx, - struct ggml_tensor * a, - struct ggml_tensor * b) { + struct ggml_tensor * a, + const struct ggml_tensor * b) { GGML_ASSERT(ggml_can_repeat(a, b)); struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); @@ -2308,8 +2308,8 @@ struct ggml_tensor * ggml_repeat( struct ggml_tensor * ggml_repeat_back( struct ggml_context * ctx, - struct ggml_tensor * a, - struct ggml_tensor * b) { + struct ggml_tensor * a, + const struct ggml_tensor * b) { GGML_ASSERT(ggml_can_repeat(b, a)); struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); @@ -2836,7 +2836,7 @@ static struct ggml_tensor * ggml_set_impl( struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); GGML_ASSERT(offset < (size_t)(1 << 30)); - int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; + const int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_SET; @@ -3014,7 +3014,7 @@ struct ggml_tensor * ggml_cont_4d( struct ggml_tensor * ggml_reshape( struct ggml_context * ctx, struct ggml_tensor * a, - struct ggml_tensor * b) { + const struct ggml_tensor * b) { GGML_ASSERT(ggml_is_contiguous(a)); // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); @@ -3247,7 +3247,7 @@ struct ggml_tensor * ggml_permute( result->op = GGML_OP_PERMUTE; result->src[0] = a; - int32_t params[] = { axis0, axis1, axis2, axis3 }; + const int32_t params[] = { axis0, axis1, axis2, axis3 }; ggml_set_op_params(result, params, sizeof(params)); return result; @@ -3302,8 +3302,8 @@ struct ggml_tensor * ggml_get_rows( struct ggml_tensor * ggml_get_rows_back( struct ggml_context * ctx, struct ggml_tensor * a, - struct ggml_tensor * b, - struct ggml_tensor * c) { + struct ggml_tensor * b, + const struct ggml_tensor * c) { GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0])); @@ -3343,7 +3343,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl( bool inplace) { struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - int32_t params[] = { n_past }; + const int32_t params[] = { n_past }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_DIAG_MASK_INF; @@ -3375,7 +3375,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl( bool inplace) { struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - int32_t params[] = { n_past }; + const int32_t params[] = { n_past }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_DIAG_MASK_ZERO; @@ -3423,7 +3423,7 @@ static struct ggml_tensor * ggml_soft_max_impl( struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - float params[] = { scale, max_bias }; + const float params[] = { scale, max_bias }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_SOFT_MAX; @@ -3758,7 +3758,7 @@ struct ggml_tensor * ggml_clamp( // TODO: when implement backward, fix this: struct ggml_tensor * result = ggml_view_tensor(ctx, a); - float params[] = { min, max }; + const float params[] = { min, max }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_CLAMP; @@ -3809,7 +3809,7 @@ struct ggml_tensor * ggml_im2col( }; struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); - int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; + const int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_IM2COL; @@ -3822,8 +3822,8 @@ struct ggml_tensor * ggml_im2col( struct ggml_tensor * ggml_im2col_back( struct ggml_context * ctx, struct ggml_tensor * a, - struct ggml_tensor * b, - int64_t * ne, + struct ggml_tensor * b, + const int64_t * ne, int s0, int s1, int p0, @@ -3832,7 +3832,7 @@ struct ggml_tensor * ggml_im2col_back( int d1, bool is_2D) { struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); - int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; + const int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_IM2COL_BACK; @@ -3932,7 +3932,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d( }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); - int32_t params[] = { s0, p0, d0 }; + const int32_t params[] = { s0, p0, d0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_CONV_TRANSPOSE_1D; @@ -4067,7 +4067,7 @@ struct ggml_tensor * ggml_pool_1d( }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); - int32_t params[] = { op, k0, s0, p0 }; + const int32_t params[] = { op, k0, s0, p0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_POOL_1D; @@ -4097,7 +4097,7 @@ struct ggml_tensor * ggml_pool_2d( }; result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); - int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; + const int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_POOL_2D; @@ -4116,11 +4116,10 @@ struct ggml_tensor * ggml_pool_2d_back( int s0, int s1, float p0, - float p1) { - struct ggml_tensor * result; - result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne); + float p1) { + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne); - int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; + const int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_POOL_2D_BACK; @@ -4206,13 +4205,9 @@ struct ggml_tensor * ggml_pad_reflect_1d( GGML_ASSERT(ggml_is_contiguous(a)); GGML_ASSERT(a->type == GGML_TYPE_F32); - struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, - a->ne[0] + p0 + p1, - a->ne[1], - a->ne[2], - a->ne[3]); + struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, a->ne[0] + p0 + p1, a->ne[1], a->ne[2], a->ne[3]); - int32_t params[] = { p0, p1 }; + const int32_t params[] = { p0, p1 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_PAD_REFLECT_1D; @@ -4332,7 +4327,7 @@ struct ggml_tensor * ggml_flash_attn_ext( int64_t ne[4] = { q->ne[0], q->ne[2], q->ne[1], q->ne[3] }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); - float params[] = { scale, max_bias, logit_softcap }; + const float params[] = { scale, max_bias, logit_softcap }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_FLASH_ATTN_EXT; @@ -4531,10 +4526,13 @@ struct ggml_tensor * ggml_win_part( const int npy = (py + a->ne[2])/w; const int np = npx*npy; - const int64_t ne[4] = { a->ne[0], w, w, np, }; + const int64_t ne[4] = { a->ne[0], w, + w, + np, + }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); - int32_t params[] = { npx, npy, w }; + const int32_t params[] = { npx, npy, w }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_WIN_PART; @@ -4556,7 +4554,7 @@ struct ggml_tensor * ggml_win_unpart( const int64_t ne[4] = { a->ne[0], w0, h0, 1, }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); - int32_t params[] = { w }; + const int32_t params[] = { w }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_WIN_UNPART; @@ -5141,11 +5139,11 @@ struct ggml_hash_set ggml_hash_set_new(size_t size) { return result; } -void ggml_hash_set_reset(struct ggml_hash_set * hash_set) { +void ggml_hash_set_reset(const struct ggml_hash_set * hash_set) { memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size)); } -void ggml_hash_set_free(struct ggml_hash_set * hash_set) { +void ggml_hash_set_free(const struct ggml_hash_set * hash_set) { GGML_FREE(hash_set->used); GGML_FREE(hash_set->keys); } @@ -5966,7 +5964,7 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false); } -struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) { +struct ggml_cgraph ggml_graph_view(const struct ggml_cgraph * cgraph0, int i0, int i1) { struct ggml_cgraph cgraph = { /*.size =*/ 0, /*.n_nodes =*/ i1 - i0, @@ -5982,7 +5980,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) return cgraph; } -void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { +void ggml_graph_cpy(const struct ggml_cgraph * src, struct ggml_cgraph * dst) { GGML_ASSERT(dst->size >= src->n_leafs); GGML_ASSERT(dst->size >= src->n_nodes); GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size); @@ -6028,7 +6026,7 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { } } -struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { +struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, const struct ggml_cgraph * cgraph) { struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL); ggml_graph_cpy(cgraph, result); return result; @@ -6047,7 +6045,7 @@ struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { return tensor; } -void ggml_graph_reset(struct ggml_cgraph * cgraph) { +void ggml_graph_reset(const struct ggml_cgraph * cgraph) { GGML_ASSERT(cgraph->grads != NULL); for (int i = 0; i < cgraph->n_nodes; i++) { @@ -6086,11 +6084,11 @@ void ggml_graph_clear(struct ggml_cgraph * cgraph) { ggml_hash_set_reset(&cgraph->visited_hash_set); } -int ggml_graph_size(struct ggml_cgraph * cgraph) { +int ggml_graph_size(const struct ggml_cgraph * cgraph) { return cgraph->size; } -struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { +struct ggml_tensor * ggml_graph_node(const struct ggml_cgraph * cgraph, int i) { if (i < 0) { GGML_ASSERT(cgraph->n_nodes + i >= 0); return cgraph->nodes[cgraph->n_nodes + i]; @@ -6100,11 +6098,11 @@ struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { return cgraph->nodes[i]; } -struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) { +struct ggml_tensor ** ggml_graph_nodes(const struct ggml_cgraph * cgraph) { return cgraph->nodes; } -int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) { +int ggml_graph_n_nodes(const struct ggml_cgraph * cgraph) { return cgraph->n_nodes; } @@ -6357,7 +6355,7 @@ void ggml_set_output(struct ggml_tensor * tensor) { tensor->flags |= GGML_TENSOR_FLAG_OUTPUT; } -void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) { +void ggml_set_param(const struct ggml_context * ctx, struct ggml_tensor * tensor) { GGML_UNUSED(ctx); // TODO: remove this parameter tensor->flags |= GGML_TENSOR_FLAG_PARAM; } diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index ab13669c5..1afd4b73b 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -1144,6 +1144,7 @@ struct gguf_writer { template void write(const T & val) const { + buf.reserve(sizeof(val)); for (size_t i = 0; i < sizeof(val); ++i) { buf.push_back(reinterpret_cast(&val)[i]); } @@ -1163,6 +1164,7 @@ struct gguf_writer { const uint64_t n = val.length(); write(n); } + buf.reserve(val.length()); for (size_t i = 0; i < val.length(); ++i) { buf.push_back(reinterpret_cast(val.data())[i]); }