ggml: reserve in gguf_writer and added const pointers as params

This commit is contained in:
Herman Semenov 2025-01-18 21:51:44 +03:00
parent a1649cc13f
commit 3100a05ba1
7 changed files with 80 additions and 79 deletions

View file

@ -46,17 +46,17 @@ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, st
typedef struct ggml_gallocr * ggml_gallocr_t; typedef struct ggml_gallocr * ggml_gallocr_t;
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); GGML_API ggml_gallocr_t ggml_gallocr_new_n(const ggml_backend_buffer_type_t * bufts, int n_bufs);
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
// pre-allocate buffers from a measure graph - does not allocate or modify the graph // pre-allocate buffers from a measure graph - does not allocate or modify the graph
// call with a worst-case graph to avoid buffer reallocations // call with a worst-case graph to avoid buffer reallocations
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
// returns false if the buffer allocation failed // returns false if the buffer allocation failed
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, const struct ggml_cgraph * graph);
GGML_API bool ggml_gallocr_reserve_n( GGML_API bool ggml_gallocr_reserve_n(
ggml_gallocr_t galloc, ggml_gallocr_t galloc,
struct ggml_cgraph * graph, const struct ggml_cgraph * graph,
const int * node_buffer_ids, const int * node_buffer_ids,
const int * leaf_buffer_ids); const int * leaf_buffer_ids);

View file

@ -698,7 +698,7 @@ extern "C" {
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx); GGML_API bool ggml_get_no_alloc(const struct ggml_context * ctx);
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx); GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
@ -745,7 +745,7 @@ extern "C" {
// Context tensor enumeration and lookup // Context tensor enumeration and lookup
GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx); GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor); GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); GGML_API struct ggml_tensor * ggml_get_tensor(const struct ggml_context * ctx, const char * name);
// Converts a flat index into coordinates // Converts a flat index into coordinates
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3); GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
@ -763,7 +763,7 @@ extern "C" {
// Tensor flags // Tensor flags
GGML_API void ggml_set_input(struct ggml_tensor * tensor); GGML_API void ggml_set_input(struct ggml_tensor * tensor);
GGML_API void ggml_set_output(struct ggml_tensor * tensor); GGML_API void ggml_set_output(struct ggml_tensor * tensor);
GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor); GGML_API void ggml_set_param(const struct ggml_context * ctx, struct ggml_tensor * tensor);
GGML_API void ggml_set_loss(struct ggml_tensor * tensor); GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
// //
@ -927,13 +927,13 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_repeat( GGML_API struct ggml_tensor * ggml_repeat(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b); const struct ggml_tensor * b);
// sums repetitions in a into shape of b // sums repetitions in a into shape of b
GGML_API struct ggml_tensor * ggml_repeat_back( GGML_API struct ggml_tensor * ggml_repeat_back(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b); const struct ggml_tensor * b);
// concat a and b along dim // concat a and b along dim
// used in stable-diffusion // used in stable-diffusion
@ -1243,7 +1243,7 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_reshape( GGML_API struct ggml_tensor * ggml_reshape(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b); const struct ggml_tensor * b);
// return view(a) // return view(a)
// TODO: when we start computing gradient, make a copy instead of view // TODO: when we start computing gradient, make a copy instead of view
@ -1335,7 +1335,7 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, // gradients of ggml_get_rows result struct ggml_tensor * a, // gradients of ggml_get_rows result
struct ggml_tensor * b, // row indices struct ggml_tensor * b, // row indices
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape const struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
GGML_API struct ggml_tensor * ggml_diag( GGML_API struct ggml_tensor * ggml_diag(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -1563,7 +1563,7 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, // convolution kernel struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // gradient of im2col output struct ggml_tensor * b, // gradient of im2col output
int64_t * ne, // shape of im2col input const int64_t * ne, // shape of im2col input
int s0, // stride dimension 0 int s0, // stride dimension 0
int s1, // stride dimension 1 int s1, // stride dimension 1
int p0, // padding dimension 0 int p0, // padding dimension 0
@ -2062,15 +2062,16 @@ extern "C" {
// graph allocation in a context // graph allocation in a context
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads); GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph); GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, const struct ggml_cgraph * cgraph);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst); GGML_API void ggml_graph_cpy (const struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1 GGML_API void ggml_graph_reset (
const struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph); GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph); GGML_API int ggml_graph_size (const struct ggml_cgraph * cgraph);
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i] GGML_API struct ggml_tensor * ggml_graph_node (const struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph); GGML_API struct ggml_tensor ** ggml_graph_nodes (const struct ggml_cgraph * cgraph);
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph); GGML_API int ggml_graph_n_nodes(const struct ggml_cgraph * cgraph);
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);

View file

@ -377,7 +377,7 @@ struct ggml_gallocr {
int n_leafs; int n_leafs;
}; };
ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) { ggml_gallocr_t ggml_gallocr_new_n(const ggml_backend_buffer_type_t * bufts, int n_bufs) {
ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr)); ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr));
GGML_ASSERT(galloc != NULL); GGML_ASSERT(galloc != NULL);
@ -563,7 +563,7 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
return node_buffer_ids ? node_buffer_ids[i] : 0; return node_buffer_ids ? node_buffer_ids[i] : 0;
} }
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, const struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
// clear hash tables // clear hash tables
ggml_hash_set_reset(&galloc->hash_set); ggml_hash_set_reset(&galloc->hash_set);
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size); memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
@ -670,7 +670,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
} }
} }
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, const struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
size_t min_hash_size = graph->n_nodes + graph->n_leafs; size_t min_hash_size = graph->n_nodes + graph->n_leafs;
// add 25% margin to avoid hash collisions // add 25% margin to avoid hash collisions
min_hash_size += min_hash_size / 4; min_hash_size += min_hash_size / 4;
@ -780,11 +780,11 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
return true; return true;
} }
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) { bool ggml_gallocr_reserve(ggml_gallocr_t galloc, const struct ggml_cgraph *graph) {
return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL); return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
} }
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, struct tensor_alloc * tensor_alloc) { static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, const struct tensor_alloc * tensor_alloc) {
int buffer_id = tensor_alloc->buffer_id; int buffer_id = tensor_alloc->buffer_id;
assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max); assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
@ -813,7 +813,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
} }
} }
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) { static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, const struct tensor_alloc * talloc) {
size_t node_size = 0; size_t node_size = 0;
if (!node->data && !node->view_src) { if (!node->data && !node->view_src) {
GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
@ -822,7 +822,7 @@ static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_t
return talloc->size_max >= node_size; return talloc->size_max >= node_size;
} }
static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph * graph) { static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, const struct ggml_cgraph * graph) {
if (galloc->n_nodes != graph->n_nodes) { if (galloc->n_nodes != graph->n_nodes) {
#ifndef NDEBUG #ifndef NDEBUG
GGML_LOG_DEBUG("%s: graph has different number of nodes\n", __func__); GGML_LOG_DEBUG("%s: graph has different number of nodes\n", __func__);
@ -933,8 +933,8 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
// utils // utils
static bool alloc_tensor_range(struct ggml_context * ctx, static bool alloc_tensor_range(const struct ggml_context * ctx,
struct ggml_tensor * first, struct ggml_tensor * last, struct ggml_tensor * first, const struct ggml_tensor * last,
ggml_backend_buffer_type_t buft, size_t size, ggml_backend_buffer_type_t buft, size_t size,
ggml_backend_buffer_t ** buffers, size_t * n_buffers) { ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size); ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);

View file

@ -124,7 +124,7 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
using dl_handle = void; using dl_handle = void;
struct dl_handle_deleter { struct dl_handle_deleter {
void operator()(void * handle) { void operator()(void * handle) const {
dlclose(handle); dlclose(handle);
} }
}; };

View file

@ -188,13 +188,13 @@ struct ggml_hash_set {
}; };
struct ggml_hash_set ggml_hash_set_new(size_t size); struct ggml_hash_set ggml_hash_set_new(size_t size);
void ggml_hash_set_free(struct ggml_hash_set * hash_set); void ggml_hash_set_free(const struct ggml_hash_set * hash_set);
// returns the minimum size for a hash set that can hold min_sz elements // returns the minimum size for a hash set that can hold min_sz elements
size_t ggml_hash_size(size_t min_sz); size_t ggml_hash_size(size_t min_sz);
// remove all elements from the hash set // remove all elements from the hash set
void ggml_hash_set_reset(struct ggml_hash_set * hash_set); void ggml_hash_set_reset(const struct ggml_hash_set * hash_set);
// returns true if key is in the hash set // returns true if key is in the hash set
static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key); static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
@ -302,7 +302,7 @@ struct ggml_cgraph {
// returns a slice of cgraph with nodes [i0, i1) // returns a slice of cgraph with nodes [i0, i1)
// the slice does not have leafs or gradients // the slice does not have leafs or gradients
// if you need the gradients, get them from the original graph // if you need the gradients, get them from the original graph
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); struct ggml_cgraph ggml_graph_view(const struct ggml_cgraph * cgraph, int i0, int i1);
// Memory allocation // Memory allocation

View file

@ -1474,7 +1474,7 @@ size_t ggml_used_mem(const struct ggml_context * ctx) {
return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size; return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
} }
bool ggml_get_no_alloc(struct ggml_context * ctx) { bool ggml_get_no_alloc(const struct ggml_context * ctx) {
return ctx->no_alloc; return ctx->no_alloc;
} }
@ -1785,7 +1785,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
return NULL; return NULL;
} }
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { struct ggml_tensor * ggml_get_tensor(const struct ggml_context * ctx, const char * name) {
struct ggml_object * obj = ctx->objects_begin; struct ggml_object * obj = ctx->objects_begin;
char * const mem_buffer = ctx->mem_buffer; char * const mem_buffer = ctx->mem_buffer;
@ -1948,7 +1948,7 @@ static struct ggml_tensor * ggml_acc_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; const int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_ACC; result->op = GGML_OP_ACC;
@ -2289,7 +2289,7 @@ struct ggml_tensor * ggml_count_equal(
struct ggml_tensor * ggml_repeat( struct ggml_tensor * ggml_repeat(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b) { const struct ggml_tensor * b) {
GGML_ASSERT(ggml_can_repeat(a, b)); GGML_ASSERT(ggml_can_repeat(a, b));
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
@ -2305,7 +2305,7 @@ struct ggml_tensor * ggml_repeat(
struct ggml_tensor * ggml_repeat_back( struct ggml_tensor * ggml_repeat_back(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b) { const struct ggml_tensor * b) {
GGML_ASSERT(ggml_can_repeat(b, a)); GGML_ASSERT(ggml_can_repeat(b, a));
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
@ -2832,7 +2832,7 @@ static struct ggml_tensor * ggml_set_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
GGML_ASSERT(offset < (size_t)(1 << 30)); GGML_ASSERT(offset < (size_t)(1 << 30));
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; const int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_SET; result->op = GGML_OP_SET;
@ -3010,7 +3010,7 @@ struct ggml_tensor * ggml_cont_4d(
struct ggml_tensor * ggml_reshape( struct ggml_tensor * ggml_reshape(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b) { const struct ggml_tensor * b) {
GGML_ASSERT(ggml_is_contiguous(a)); GGML_ASSERT(ggml_is_contiguous(a));
// as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous. // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
@ -3243,7 +3243,7 @@ struct ggml_tensor * ggml_permute(
result->op = GGML_OP_PERMUTE; result->op = GGML_OP_PERMUTE;
result->src[0] = a; result->src[0] = a;
int32_t params[] = { axis0, axis1, axis2, axis3 }; const int32_t params[] = { axis0, axis1, axis2, axis3 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
return result; return result;
@ -3299,7 +3299,7 @@ struct ggml_tensor * ggml_get_rows_back(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c) { const struct ggml_tensor * c) {
GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0])); GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
@ -3339,7 +3339,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
bool inplace) { bool inplace) {
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past }; const int32_t params[] = { n_past };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_DIAG_MASK_INF; result->op = GGML_OP_DIAG_MASK_INF;
@ -3371,7 +3371,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
bool inplace) { bool inplace) {
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past }; const int32_t params[] = { n_past };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_DIAG_MASK_ZERO; result->op = GGML_OP_DIAG_MASK_ZERO;
@ -3419,7 +3419,7 @@ static struct ggml_tensor * ggml_soft_max_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
float params[] = { scale, max_bias }; const float params[] = { scale, max_bias };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_SOFT_MAX; result->op = GGML_OP_SOFT_MAX;
@ -3754,7 +3754,7 @@ struct ggml_tensor * ggml_clamp(
// TODO: when implement backward, fix this: // TODO: when implement backward, fix this:
struct ggml_tensor * result = ggml_view_tensor(ctx, a); struct ggml_tensor * result = ggml_view_tensor(ctx, a);
float params[] = { min, max }; const float params[] = { min, max };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_CLAMP; result->op = GGML_OP_CLAMP;
@ -3805,7 +3805,7 @@ struct ggml_tensor * ggml_im2col(
}; };
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; const int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_IM2COL; result->op = GGML_OP_IM2COL;
@ -3819,7 +3819,7 @@ struct ggml_tensor * ggml_im2col_back(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
int64_t * ne, const int64_t * ne,
int s0, int s0,
int s1, int s1,
int p0, int p0,
@ -3828,7 +3828,7 @@ struct ggml_tensor * ggml_im2col_back(
int d1, int d1,
bool is_2D) { bool is_2D) {
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; const int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_IM2COL_BACK; result->op = GGML_OP_IM2COL_BACK;
@ -3928,7 +3928,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
}; };
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { s0, p0, d0 }; const int32_t params[] = { s0, p0, d0 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_CONV_TRANSPOSE_1D; result->op = GGML_OP_CONV_TRANSPOSE_1D;
@ -4063,7 +4063,7 @@ struct ggml_tensor * ggml_pool_1d(
}; };
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { op, k0, s0, p0 }; const int32_t params[] = { op, k0, s0, p0 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_POOL_1D; result->op = GGML_OP_POOL_1D;
@ -4093,7 +4093,7 @@ struct ggml_tensor * ggml_pool_2d(
}; };
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; const int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_POOL_2D; result->op = GGML_OP_POOL_2D;
@ -4113,10 +4113,9 @@ struct ggml_tensor * ggml_pool_2d_back(
int s1, int s1,
float p0, float p0,
float p1) { float p1) {
struct ggml_tensor * result; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; const int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_POOL_2D_BACK; result->op = GGML_OP_POOL_2D_BACK;
@ -4202,13 +4201,9 @@ struct ggml_tensor * ggml_pad_reflect_1d(
GGML_ASSERT(ggml_is_contiguous(a)); GGML_ASSERT(ggml_is_contiguous(a));
GGML_ASSERT(a->type == GGML_TYPE_F32); GGML_ASSERT(a->type == GGML_TYPE_F32);
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, a->ne[0] + p0 + p1, a->ne[1], a->ne[2], a->ne[3]);
a->ne[0] + p0 + p1,
a->ne[1],
a->ne[2],
a->ne[3]);
int32_t params[] = { p0, p1 }; const int32_t params[] = { p0, p1 };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_PAD_REFLECT_1D; result->op = GGML_OP_PAD_REFLECT_1D;
@ -4328,7 +4323,7 @@ struct ggml_tensor * ggml_flash_attn_ext(
int64_t ne[4] = { q->ne[0], q->ne[2], q->ne[1], q->ne[3] }; int64_t ne[4] = { q->ne[0], q->ne[2], q->ne[1], q->ne[3] };
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
float params[] = { scale, max_bias, logit_softcap }; const float params[] = { scale, max_bias, logit_softcap };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_FLASH_ATTN_EXT; result->op = GGML_OP_FLASH_ATTN_EXT;
@ -4527,10 +4522,13 @@ struct ggml_tensor * ggml_win_part(
const int npy = (py + a->ne[2])/w; const int npy = (py + a->ne[2])/w;
const int np = npx*npy; const int np = npx*npy;
const int64_t ne[4] = { a->ne[0], w, w, np, }; const int64_t ne[4] = { a->ne[0], w,
w,
np,
};
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { npx, npy, w }; const int32_t params[] = { npx, npy, w };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_WIN_PART; result->op = GGML_OP_WIN_PART;
@ -4552,7 +4550,7 @@ struct ggml_tensor * ggml_win_unpart(
const int64_t ne[4] = { a->ne[0], w0, h0, 1, }; const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
int32_t params[] = { w }; const int32_t params[] = { w };
ggml_set_op_params(result, params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_WIN_UNPART; result->op = GGML_OP_WIN_UNPART;
@ -5137,11 +5135,11 @@ struct ggml_hash_set ggml_hash_set_new(size_t size) {
return result; return result;
} }
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) { void ggml_hash_set_reset(const struct ggml_hash_set * hash_set) {
memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size)); memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
} }
void ggml_hash_set_free(struct ggml_hash_set * hash_set) { void ggml_hash_set_free(const struct ggml_hash_set * hash_set) {
GGML_FREE(hash_set->used); GGML_FREE(hash_set->used);
GGML_FREE(hash_set->keys); GGML_FREE(hash_set->keys);
} }
@ -5956,7 +5954,7 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false); return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
} }
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) { struct ggml_cgraph ggml_graph_view(const struct ggml_cgraph * cgraph0, int i0, int i1) {
struct ggml_cgraph cgraph = { struct ggml_cgraph cgraph = {
/*.size =*/ 0, /*.size =*/ 0,
/*.n_nodes =*/ i1 - i0, /*.n_nodes =*/ i1 - i0,
@ -5972,7 +5970,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
return cgraph; return cgraph;
} }
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { void ggml_graph_cpy(const struct ggml_cgraph * src, struct ggml_cgraph * dst) {
GGML_ASSERT(dst->size >= src->n_leafs); GGML_ASSERT(dst->size >= src->n_leafs);
GGML_ASSERT(dst->size >= src->n_nodes); GGML_ASSERT(dst->size >= src->n_nodes);
GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size); GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
@ -6018,7 +6016,7 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
} }
} }
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, const struct ggml_cgraph * cgraph) {
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL); struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
ggml_graph_cpy(cgraph, result); ggml_graph_cpy(cgraph, result);
return result; return result;
@ -6037,7 +6035,7 @@ struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
return tensor; return tensor;
} }
void ggml_graph_reset(struct ggml_cgraph * cgraph) { void ggml_graph_reset(const struct ggml_cgraph * cgraph) {
GGML_ASSERT(cgraph->grads != NULL); GGML_ASSERT(cgraph->grads != NULL);
for (int i = 0; i < cgraph->n_nodes; i++) { for (int i = 0; i < cgraph->n_nodes; i++) {
@ -6076,11 +6074,11 @@ void ggml_graph_clear(struct ggml_cgraph * cgraph) {
ggml_hash_set_reset(&cgraph->visited_hash_set); ggml_hash_set_reset(&cgraph->visited_hash_set);
} }
int ggml_graph_size(struct ggml_cgraph * cgraph) { int ggml_graph_size(const struct ggml_cgraph * cgraph) {
return cgraph->size; return cgraph->size;
} }
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { struct ggml_tensor * ggml_graph_node(const struct ggml_cgraph * cgraph, int i) {
if (i < 0) { if (i < 0) {
GGML_ASSERT(cgraph->n_nodes + i >= 0); GGML_ASSERT(cgraph->n_nodes + i >= 0);
return cgraph->nodes[cgraph->n_nodes + i]; return cgraph->nodes[cgraph->n_nodes + i];
@ -6090,11 +6088,11 @@ struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
return cgraph->nodes[i]; return cgraph->nodes[i];
} }
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) { struct ggml_tensor ** ggml_graph_nodes(const struct ggml_cgraph * cgraph) {
return cgraph->nodes; return cgraph->nodes;
} }
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) { int ggml_graph_n_nodes(const struct ggml_cgraph * cgraph) {
return cgraph->n_nodes; return cgraph->n_nodes;
} }
@ -6347,7 +6345,7 @@ void ggml_set_output(struct ggml_tensor * tensor) {
tensor->flags |= GGML_TENSOR_FLAG_OUTPUT; tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
} }
void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) { void ggml_set_param(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
GGML_UNUSED(ctx); // TODO: remove this parameter GGML_UNUSED(ctx); // TODO: remove this parameter
tensor->flags |= GGML_TENSOR_FLAG_PARAM; tensor->flags |= GGML_TENSOR_FLAG_PARAM;
} }

View file

@ -1144,6 +1144,7 @@ struct gguf_writer {
template <typename T> template <typename T>
void write(const T & val) const { void write(const T & val) const {
buf.reserve(sizeof(val));
for (size_t i = 0; i < sizeof(val); ++i) { for (size_t i = 0; i < sizeof(val); ++i) {
buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]); buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
} }
@ -1163,6 +1164,7 @@ struct gguf_writer {
const uint64_t n = val.length(); const uint64_t n = val.length();
write(n); write(n);
} }
buf.reserve(val.length());
for (size_t i = 0; i < val.length(); ++i) { for (size_t i = 0; i < val.length(); ++i) {
buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]); buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]);
} }