ggml : reduce hash table reset cost

This commit is contained in:
slaren 2024-03-18 15:31:21 +01:00
parent 01245f5b16
commit ac6ce60ec4
7 changed files with 369 additions and 265 deletions

View file

@ -325,9 +325,9 @@ ifdef LLAMA_DEBUG
endif endif
else else
MK_CPPFLAGS += -DNDEBUG MK_CPPFLAGS += -DNDEBUG
MK_CFLAGS += -O3 MK_CFLAGS += -O3 -g
MK_CXXFLAGS += -O3 MK_CXXFLAGS += -O3 -g
MK_NVCCFLAGS += -O3 MK_NVCCFLAGS += -O3 -g
endif endif
ifdef LLAMA_SANITIZE_THREAD ifdef LLAMA_SANITIZE_THREAD

View file

@ -254,18 +254,8 @@
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
#define GGML_ASSERT(x) \
do { \
if (!(x)) { \
fflush(stdout); \
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
ggml_print_backtrace(); \
abort(); \
} \
} while (0)
#ifndef NDEBUG #ifndef NDEBUG
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached") #define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#elif defined(__GNUC__) #elif defined(__GNUC__)
#define GGML_UNREACHABLE() __builtin_unreachable() #define GGML_UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
@ -274,6 +264,16 @@
#define GGML_UNREACHABLE() ((void) 0) #define GGML_UNREACHABLE() ((void) 0)
#endif #endif
#ifdef __cplusplus
#define GGML_NORETURN [[noreturn]]
#elif defined(_MSC_VER)
#define GGML_NORETURN __declspec(noreturn)
#else
#define GGML_NORETURN _Noreturn
#endif
#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x)
// used to copy the number of elements and stride in bytes of tensors into local variables. // used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability. // main purpose is to reduce code duplication and improve readability.
// //
@ -322,6 +322,8 @@
extern "C" { extern "C" {
#endif #endif
GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr);
enum ggml_status { enum ggml_status {
GGML_STATUS_ALLOC_FAILED = -2, GGML_STATUS_ALLOC_FAILED = -2,
GGML_STATUS_FAILED = -1, GGML_STATUS_FAILED = -1,
@ -636,8 +638,11 @@ extern "C" {
GGML_CGRAPH_EVAL_ORDER_COUNT GGML_CGRAPH_EVAL_ORDER_COUNT
}; };
typedef uint32_t ggml_bitset_t;
struct ggml_hash_set { struct ggml_hash_set {
size_t size; size_t size;
ggml_bitset_t * used;
struct ggml_tensor ** keys; struct ggml_tensor ** keys;
}; };
@ -651,7 +656,7 @@ extern "C" {
struct ggml_tensor ** grads; struct ggml_tensor ** grads;
struct ggml_tensor ** leafs; struct ggml_tensor ** leafs;
struct ggml_hash_set visited_hash_table; struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order; enum ggml_cgraph_eval_order order;
}; };
@ -698,8 +703,6 @@ extern "C" {
GGML_API int64_t ggml_cycles(void); GGML_API int64_t ggml_cycles(void);
GGML_API int64_t ggml_cycles_per_ms(void); GGML_API int64_t ggml_cycles_per_ms(void);
GGML_API void ggml_print_backtrace(void);
// accepts a UTF-8 path, even on Windows // accepts a UTF-8 path, even on Windows
GGML_API FILE * ggml_fopen(const char * fname, const char * mode); GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
@ -2006,7 +2009,7 @@ extern "C" {
// ggml_graph_plan() has to be called before ggml_graph_compute() // ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data // when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
// same as ggml_graph_compute() but the work data is allocated as a part of the context // same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);

View file

@ -443,7 +443,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
} }
} }
free(galloc->hash_set.keys); ggml_hash_set_free(&galloc->hash_set);
free(galloc->hash_values); free(galloc->hash_values);
free(galloc->bufts); free(galloc->bufts);
free(galloc->buffers); free(galloc->buffers);
@ -456,7 +456,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
typedef struct ggml_gallocr * ggml_gallocr_t; typedef struct ggml_gallocr * ggml_gallocr_t;
static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) { static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
size_t i = ggml_hash_find_or_insert(galloc->hash_set, t); size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t);
return &galloc->hash_values[i]; return &galloc->hash_values[i];
} }
@ -565,8 +565,8 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
// clear hash tables // clear hash tables
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *)); ggml_hash_set_reset(&galloc->hash_set);
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node)); memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
// allocate leafs // allocate leafs
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes // these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
@ -671,21 +671,19 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
} }
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
size_t hash_size = graph->visited_hash_table.size; size_t min_hash_size = graph->n_nodes + graph->n_leafs;
// add 25% margin to avoid hash collisions
min_hash_size += min_hash_size / 4;
// initialize hash table // initialize hash table
if (galloc->hash_set.size < hash_size) { if (galloc->hash_set.size < min_hash_size) {
free(galloc->hash_set.keys); ggml_hash_set_free(&galloc->hash_set);
free(galloc->hash_values); galloc->hash_set = ggml_hash_set_new(min_hash_size);
galloc->hash_set.size = hash_size;
galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
GGML_ASSERT(galloc->hash_set.keys != NULL); GGML_ASSERT(galloc->hash_set.keys != NULL);
free(galloc->hash_values);
galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size);
GGML_ASSERT(galloc->hash_values != NULL); GGML_ASSERT(galloc->hash_values != NULL);
} else {
// reset hash table
memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size);
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
} }
// reset allocators // reset allocators
@ -817,8 +815,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
} }
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) { static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL; size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
return talloc->size_max >= node_size; return talloc->size_max >= node_size;
} }

View file

@ -1055,11 +1055,10 @@ struct ggml_backend_sched {
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS]; ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
ggml_gallocr_t galloc; ggml_gallocr_t galloc;
// hash keys of the nodes in the graph // hash map of the nodes in the graph
struct ggml_hash_set hash_set; struct ggml_hash_set hash_set;
// hash values int * hv_tensor_backend_ids; // [hash_set.size]
int * tensor_backend_id; struct ggml_tensor ** hv_tensor_copies; // [hash_set.size][n_backends][n_copies]
struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
int * node_backend_ids; // [graph_size] int * node_backend_ids; // [graph_size]
int * leaf_backend_ids; // [graph_size] int * leaf_backend_ids; // [graph_size]
@ -1068,7 +1067,7 @@ struct ggml_backend_sched {
int * prev_leaf_backend_ids; // [graph_size] int * prev_leaf_backend_ids; // [graph_size]
// copy of the graph with modified inputs // copy of the graph with modified inputs
struct ggml_cgraph * graph; struct ggml_cgraph graph;
// graph splits // graph splits
struct ggml_backend_sched_split * splits; struct ggml_backend_sched_split * splits;
@ -1087,19 +1086,16 @@ struct ggml_backend_sched {
ggml_backend_sched_eval_callback callback_eval; ggml_backend_sched_eval_callback callback_eval;
void * callback_eval_user_data; void * callback_eval_user_data;
bool debug; char * context_buffer;
size_t context_buffer_size;
// align context_buffer to GGML_MEM_ALIGN bool debug;
#ifdef _MSC_VER
__declspec(align(GGML_MEM_ALIGN))
#else
__attribute__((aligned(GGML_MEM_ALIGN)))
#endif
char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
}; };
#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor) #define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)] #define tensor_backend_id(tensor) sched->hv_tensor_backend_ids[hash_id(tensor)]
#define tensor_id_copy(id, backend_id, copy_id) sched->hv_tensor_copies[(id) * sched->n_backends * sched->n_copies + (backend_id) * sched->n_copies + (copy_id)]
#define tensor_copy(tensor, backend_id, copy_id) tensor_id_copy(hash_id(tensor), backend_id, copy_id)
// returns the priority of the backend, lower id is higher priority // returns the priority of the backend, lower id is higher priority
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) { static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
@ -1169,7 +1165,6 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
return cur_backend_id; return cur_backend_id;
} }
// assign nodes that use weights to the backend of the weights
// operations with weights are preferably run on the same backend as the weights // operations with weights are preferably run on the same backend as the weights
for (int i = 0; i < GGML_MAX_SRC; i++) { for (int i = 0; i < GGML_MAX_SRC; i++) {
const struct ggml_tensor * src = tensor->src[i]; const struct ggml_tensor * src = tensor->src[i];
@ -1275,7 +1270,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
sched->is_reset = false; sched->is_reset = false;
struct ggml_init_params params = { struct ggml_init_params params = {
/* .mem_size = */ sizeof(sched->context_buffer), /* .mem_size = */ sched->context_buffer_size,
/* .mem_buffer = */ sched->context_buffer, /* .mem_buffer = */ sched->context_buffer,
/* .no_alloc = */ true /* .no_alloc = */ true
}; };
@ -1292,22 +1287,25 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
for (int i = 0; i < graph->n_leafs; i++) { for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i]; struct ggml_tensor * leaf = graph->leafs[i];
int * leaf_backend_id = &tensor_backend_id(leaf); int * leaf_backend_id = &tensor_backend_id(leaf);
if (*leaf_backend_id != -1) {
// do not overwrite user assignments // do not overwrite user assignments
continue; if (*leaf_backend_id == -1) {
}
*leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf); *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
} }
}
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
int * node_backend_id = &tensor_backend_id(node); int * node_backend_id = &tensor_backend_id(node);
if (*node_backend_id != -1) {
// do not overwrite user assignments // do not overwrite user assignments
if (*node_backend_id == -1) {
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
#if 0
// src
if (node->op == GGML_OP_NONE) {
continue; continue;
} }
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
// src
for (int j = 0; j < GGML_MAX_SRC; j++) { for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j]; struct ggml_tensor * src = node->src[j];
if (src == NULL) { if (src == NULL) {
@ -1318,6 +1316,8 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
*src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src); *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
} }
} }
#endif
}
} }
// pass 2: expand current backend assignments // pass 2: expand current backend assignments
@ -1488,12 +1488,13 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
} }
// pass 4: split graph, find tensors that need to be copied // pass 5: split graph, find tensors that need to be copied
{ {
int i_split = 0; int i_split = 0;
struct ggml_backend_sched_split * split = &sched->splits[0]; struct ggml_backend_sched_split * split = &sched->splits[0];
// find the backend of the first split, skipping view ops // find the backend of the first split, skipping view ops
for (int i = 0; i < graph->n_nodes; i++) { int i = 0;
for (; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
if (!ggml_is_view_op(node->op)) { if (!ggml_is_view_op(node->op)) {
split->backend_id = tensor_backend_id(node); split->backend_id = tensor_backend_id(node);
@ -1502,9 +1503,8 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
split->i_start = 0; split->i_start = 0;
split->n_inputs = 0; split->n_inputs = 0;
memset(split->inputs, 0, sizeof(split->inputs)); //HACK
int cur_backend_id = split->backend_id; int cur_backend_id = split->backend_id;
for (int i = 0; i < graph->n_nodes; i++) { for (; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) { if (ggml_is_view_op(node->op)) {
@ -1513,7 +1513,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
const int node_backend_id = tensor_backend_id(node); const int node_backend_id = tensor_backend_id(node);
GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now assert(node_backend_id != -1); // all nodes should be assigned by now
// check if we should start a new split based on the sources of the current node // check if we should start a new split based on the sources of the current node
bool need_new_split = false; bool need_new_split = false;
@ -1527,7 +1527,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
// by starting a new split, the memory of the previously offloaded weights can be reused // by starting a new split, the memory of the previously offloaded weights can be reused
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
int src_backend_id = tensor_backend_id(src); int src_backend_id = tensor_backend_id(src);
if (src_backend_id != -1 && src_backend_id != cur_backend_id) { if (src_backend_id != cur_backend_id) {
need_new_split = true; need_new_split = true;
break; break;
} }
@ -1536,9 +1536,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
// FIXME: count the number of inputs instead of only checking when full // FIXME: count the number of inputs instead of only checking when full
if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) { if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
const size_t id = hash_id(src); const size_t id = hash_id(src);
int src_backend_id = sched->tensor_backend_id[id]; int src_backend_id = sched->hv_tensor_backend_ids[id];
bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id); bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
if (src_backend_id != cur_backend_id && sched->tensor_copies[hash_id(src)][cur_backend_id][0] == NULL && !supported) { if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == NULL && !supported) {
//printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name); //printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name);
need_new_split = true; need_new_split = true;
break; break;
@ -1570,12 +1570,12 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
continue; continue;
} }
const int src_backend_id = tensor_backend_id(src); size_t src_id = hash_id(src);
const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
assert(src_backend_id != -1); // all inputs should be assigned by now assert(src_backend_id != -1); // all inputs should be assigned by now
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) { if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
size_t id = hash_id(src); if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
ggml_backend_t backend = sched->backends[src_backend_id]; ggml_backend_t backend = sched->backends[src_backend_id];
for (int c = 0; c < sched->n_copies; c++) { for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy; struct ggml_tensor * tensor_copy;
@ -1589,7 +1589,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
ggml_set_input(tensor_copy); ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
} }
sched->tensor_copies[id][src_backend_id][c] = tensor_copy; tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy"); SET_CAUSE(tensor_copy, "4.cpy");
} }
int n_graph_inputs = sched->n_graph_inputs++; int n_graph_inputs = sched->n_graph_inputs++;
@ -1598,11 +1598,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
} }
bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id); if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
if (src_backend_id != cur_backend_id && !supported) {
// create a copy of the input in the split's backend // create a copy of the input in the split's backend
const size_t id = hash_id(src); if (tensor_id_copy(src_id, cur_backend_id, 0) == NULL) {
if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
ggml_backend_t backend = sched->backends[cur_backend_id]; ggml_backend_t backend = sched->backends[cur_backend_id];
for (int c = 0; c < sched->n_copies; c++) { for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
@ -1611,14 +1609,14 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
ggml_set_input(tensor_copy); ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
} }
sched->tensor_copies[id][cur_backend_id][c] = tensor_copy; tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy"); SET_CAUSE(tensor_copy, "4.cpy");
} }
int n_inputs = split->n_inputs++; int n_inputs = split->n_inputs++;
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS); GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
split->inputs[n_inputs] = src; split->inputs[n_inputs] = src;
} }
node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy]; node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy);
} }
} }
} }
@ -1630,7 +1628,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
ggml_backend_sched_print_assignments(sched, graph); ggml_backend_sched_print_assignments(sched, graph);
} }
// swap node_backend_ids and leaf_backend_ids and prevs // swap node_backend_ids and leaf _backend_ids with prevs
{ {
int * tmp = sched->node_backend_ids; int * tmp = sched->node_backend_ids;
sched->node_backend_ids = sched->prev_node_backend_ids; sched->node_backend_ids = sched->prev_node_backend_ids;
@ -1641,9 +1639,19 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
sched->prev_leaf_backend_ids = tmp; sched->prev_leaf_backend_ids = tmp;
} }
// create copies of the graph for each split int graph_size = graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
// TODO: avoid this copy if (sched->graph.size < graph_size) {
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2, false); sched->graph.size = graph_size;
sched->graph.nodes = realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
sched->graph.leafs = realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *));
GGML_ASSERT(sched->graph.nodes != NULL);
GGML_ASSERT(sched->graph.leafs != NULL);
}
sched->graph.n_nodes = 0;
sched->graph.n_leafs = 0;
struct ggml_cgraph * graph_copy = &sched->graph;
for (int i = 0; i < sched->n_splits; i++) { for (int i = 0; i < sched->n_splits; i++) {
struct ggml_backend_sched_split * split = &sched->splits[i]; struct ggml_backend_sched_split * split = &sched->splits[i];
split->graph = ggml_graph_view(graph, split->i_start, split->i_end); split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
@ -1654,12 +1662,12 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
struct ggml_tensor * input = split->inputs[j]; struct ggml_tensor * input = split->inputs[j];
const size_t input_id = hash_id(input); const size_t input_id = hash_id(input);
struct ggml_tensor * input_cpy = sched->tensor_copies[input_id][split->backend_id][sched->cur_copy]; struct ggml_tensor * input_cpy = tensor_id_copy(input_id, split->backend_id, sched->cur_copy);
// add a dependency to the input source so that it is not freed before the copy is done // add a dependency to the input source so that it is not freed before the copy is done
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input); struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
input_dep->src[0] = input; input_dep->src[0] = input;
sched->node_backend_ids[graph_copy->n_nodes] = sched->tensor_backend_id[input_id]; sched->node_backend_ids[graph_copy->n_nodes] = sched->hv_tensor_backend_ids[input_id];
graph_copy->nodes[graph_copy->n_nodes++] = input_dep; graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
// add a dependency to the input copy so that it is allocated at the start of the split // add a dependency to the input copy so that it is allocated at the start of the split
@ -1681,7 +1689,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
size_t id = hash_id(input); size_t id = hash_id(input);
int backend_id = tensor_backend_id(input); int backend_id = tensor_backend_id(input);
for (int c = 0; c < sched->n_copies; c++) { for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c]; struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
} }
@ -1694,7 +1702,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
struct ggml_tensor * input = split->inputs[j]; struct ggml_tensor * input = split->inputs[j];
size_t id = hash_id(input); size_t id = hash_id(input);
for (int c = 0; c < sched->n_copies; c++) { for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c]; struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
} }
@ -1708,13 +1716,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf); sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
graph_copy->leafs[graph_copy->n_leafs++] = leaf; graph_copy->leafs[graph_copy->n_leafs++] = leaf;
} }
sched->graph = graph_copy;
} }
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
bool backend_ids_changed = false; bool backend_ids_changed = false;
for (int i = 0; i < sched->graph->n_nodes; i++) { for (int i = 0; i < sched->graph.n_nodes; i++) {
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] && if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) { sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
backend_ids_changed = true; backend_ids_changed = true;
@ -1722,7 +1728,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
} }
} }
if (!backend_ids_changed) { if (!backend_ids_changed) {
for (int i = 0; i < sched->graph->n_leafs; i++) { for (int i = 0; i < sched->graph.n_leafs; i++) {
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] && if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) { sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
backend_ids_changed = true; backend_ids_changed = true;
@ -1732,14 +1738,14 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
} }
// allocate graph // allocate graph
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
// the re-allocation may cause the split inputs to be moved to a different address // the re-allocation may cause the split inputs to be moved to a different address
ggml_backend_sched_synchronize(sched); ggml_backend_sched_synchronize(sched);
#ifndef NDEBUG #ifndef NDEBUG
fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__); fprintf(stderr, "%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
#endif #endif
ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids); ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
fprintf(stderr, "%s: failed to allocate graph\n", __func__); fprintf(stderr, "%s: failed to allocate graph\n", __func__);
return false; return false;
} }
@ -1760,7 +1766,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
for (int j = 0; j < split->n_inputs; j++) { for (int j = 0; j < split->n_inputs; j++) {
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]); ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
struct ggml_tensor * input = split->inputs[j]; struct ggml_tensor * input = split->inputs[j];
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy]; struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy);
if (input->flags & GGML_TENSOR_FLAG_INPUT) { if (input->flags & GGML_TENSOR_FLAG_INPUT) {
// inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
@ -1846,11 +1852,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched)); struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched));
sched->debug = getenv("GGML_SCHED_DEBUG") != NULL; sched->debug = getenv("GGML_SCHED_DEBUG") != NULL;
sched->n_backends = n_backends;
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
// initialize hash table // initialize hash table
// FIXME: needs to be size*2 to account for leafs (do it in graph_split instead)
sched->hash_set = ggml_hash_set_new(graph_size); sched->hash_set = ggml_hash_set_new(graph_size);
sched->tensor_backend_id = calloc(sched->hash_set.size, sizeof(sched->tensor_backend_id[0])); sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
sched->tensor_copies = calloc(sched->hash_set.size, sizeof(sched->tensor_copies[0])); sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2; const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0])); sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
@ -1858,9 +1867,8 @@ ggml_backend_sched_t ggml_backend_sched_new(
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0])); sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0])); sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
sched->n_backends = n_backends; sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
sched->context_buffer = malloc(sched->context_buffer_size);
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
const int initial_splits_capacity = 16; const int initial_splits_capacity = 16;
sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0])); sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0]));
@ -1895,37 +1903,37 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
} }
ggml_gallocr_free(sched->galloc); ggml_gallocr_free(sched->galloc);
ggml_free(sched->ctx); ggml_free(sched->ctx);
ggml_hash_set_free(&sched->hash_set);
free(sched->splits); free(sched->splits);
free(sched->hash_set.keys); free(sched->hv_tensor_backend_ids);
free(sched->tensor_backend_id); free(sched->hv_tensor_copies);
free(sched->tensor_copies);
free(sched->node_backend_ids); free(sched->node_backend_ids);
free(sched->leaf_backend_ids); free(sched->leaf_backend_ids);
free(sched->prev_node_backend_ids); free(sched->prev_node_backend_ids);
free(sched->prev_leaf_backend_ids); free(sched->prev_leaf_backend_ids);
free(sched->context_buffer);
free(sched->graph.nodes);
free(sched->graph.leafs);
free(sched); free(sched);
} }
void ggml_backend_sched_reset(ggml_backend_sched_t sched) { void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
// reset state for the next run // reset state for the next run
if (!sched->is_reset) { if (!sched->is_reset) {
size_t hash_size = sched->hash_set.size; ggml_hash_set_reset(&sched->hash_set);
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT memset(sched->hv_tensor_backend_ids, -1, sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size); memset(sched->hv_tensor_copies, 0, sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
sched->is_reset = true; sched->is_reset = true;
} }
sched->is_alloc = false; sched->is_alloc = false;
} }
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes); GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
ggml_backend_sched_split_graph(sched, measure_graph); ggml_backend_sched_split_graph(sched, measure_graph);
// TODO: extract this to a separate function if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
return false; return false;
} }
@ -1936,10 +1944,11 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
} }
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes); GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
ggml_backend_sched_split_graph(sched, graph); ggml_backend_sched_split_graph(sched, graph);
if (!ggml_backend_sched_alloc_splits(sched)) { if (!ggml_backend_sched_alloc_splits(sched)) {
return false; return false;
} }
@ -2009,6 +2018,7 @@ void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct gg
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
tensor_backend_id(node) = backend_index; tensor_backend_id(node) = backend_index;
SET_CAUSE(node, "usr"); SET_CAUSE(node, "usr");
sched->is_reset = false;
} }
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
@ -2051,9 +2061,9 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set,
GGML_ASSERT(src != NULL); GGML_ASSERT(src != NULL);
GGML_ASSERT(src->data && "graph must be allocated"); GGML_ASSERT(src->data && "graph must be allocated");
size_t id = ggml_hash_insert(hash_set, src); size_t id = ggml_hash_insert(&hash_set, src);
if (id == GGML_HASHTABLE_ALREADY_EXISTS) { if (id == GGML_HASHSET_ALREADY_EXISTS) {
return node_copies[ggml_hash_find(hash_set, src)]; return node_copies[ggml_hash_find(&hash_set, src)];
} }
struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src); struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
@ -2078,7 +2088,7 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set,
return dst; return dst;
} }
static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
size_t id = ggml_hash_find(hash_set, src); size_t id = ggml_hash_find(hash_set, src);
if (node_init[id]) { if (node_init[id]) {
return; return;
@ -2105,10 +2115,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
} }
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) { struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
struct ggml_hash_set hash_set = { struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size);
/* .size = */ graph->visited_hash_table.size,
/* .keys = */ calloc(graph->visited_hash_table.size, sizeof(hash_set.keys[0])) // NOLINT
};
struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
bool * node_init = calloc(hash_set.size, sizeof(node_init[0])); bool * node_init = calloc(hash_set.size, sizeof(node_init[0]));
@ -2123,7 +2130,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
if (ctx_allocated == NULL || ctx_unallocated == NULL) { if (ctx_allocated == NULL || ctx_unallocated == NULL) {
fprintf(stderr, "failed to allocate context for graph copy\n"); fprintf(stderr, "failed to allocate context for graph copy\n");
free(hash_set.keys); ggml_hash_set_free(&hash_set);
free(node_copies); free(node_copies);
free(node_init); free(node_init);
ggml_free(ctx_allocated); ggml_free(ctx_allocated);
@ -2146,7 +2153,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend); ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
if (buffer == NULL) { if (buffer == NULL) {
fprintf(stderr, "failed to allocate buffer for graph copy\n"); fprintf(stderr, "failed to allocate buffer for graph copy\n");
free(hash_set.keys); ggml_hash_set_free(&hash_set);
free(node_copies); free(node_copies);
free(node_init); free(node_init);
ggml_free(ctx_allocated); ggml_free(ctx_allocated);
@ -2164,19 +2171,19 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
// copy data and init views // copy data and init views
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
graph_copy_init_tensor(hash_set, node_copies, node_init, node); graph_copy_init_tensor(&hash_set, node_copies, node_init, node);
} }
// build graph copy // build graph copy
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false); struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
struct ggml_tensor * node_copy = node_copies[ggml_hash_find(hash_set, node)]; struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)];
graph_copy->nodes[i] = node_copy; graph_copy->nodes[i] = node_copy;
} }
graph_copy->n_nodes = graph->n_nodes; graph_copy->n_nodes = graph->n_nodes;
free(hash_set.keys); ggml_hash_set_free(&hash_set);
free(node_copies); free(node_copies);
free(node_init); free(node_init);

View file

@ -634,21 +634,121 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif #endif
#define GGML_HASHTABLE_FULL ((size_t)-1) // bitset
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
static size_t ggml_bitset_size(size_t n) {
return (n + BITSET_MASK) >> BITSET_SHR;
}
static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
}
static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
}
static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
}
// hash set
#define GGML_HASHSET_FULL ((size_t)-1)
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
struct ggml_hash_set ggml_hash_set_new(size_t size); struct ggml_hash_set ggml_hash_set_new(size_t size);
void ggml_hash_set_free(struct ggml_hash_set * hash_set);
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key); // returns the minimum size for a hash set that can hold min_sz elements
size_t ggml_hash_size(size_t min_sz);
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted // remove all elements from the hash set
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key); void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full // returns true if key is in the hash set
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key); static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
// returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
// returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
// return index, asserts if table is full // return index, asserts if table is full
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key); static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
// hash function for ggml_tensor
static inline size_t ggml_hash(const struct ggml_tensor * p) {
// the last 4 bits are always zero due to alignment
return (size_t)(uintptr_t)p >> 4;
}
static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
size_t h = ggml_hash(key) % hash_set->size;
// linear probing
size_t i = h;
while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
i = (i + 1) % hash_set->size;
if (i == h) {
// visited all hash table entries -> not found
return GGML_HASHSET_FULL;
}
}
return i;
}
static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
size_t i = ggml_hash_find(hash_set, key);
return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
}
static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
size_t h = ggml_hash(key) % hash_set->size;
// linear probing
size_t i = h;
do {
if (!ggml_bitset_get(hash_set->used, i)) {
ggml_bitset_set(hash_set->used, i);
hash_set->keys[i] = key;
return i;
}
if (hash_set->keys[i] == key) {
return GGML_HASHSET_ALREADY_EXISTS;
}
i = (i + 1) % hash_set->size;
} while (i != h);
// visited all hash table entries -> not found
GGML_ASSERT(false);
}
static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
size_t h = ggml_hash(key) % hash_set->size;
// linear probing
size_t i = h;
do {
if (!ggml_bitset_get(hash_set->used, i)) {
ggml_bitset_set(hash_set->used, i);
hash_set->keys[i] = key;
return i;
}
if (hash_set->keys[i] == key) {
return i;
}
i = (i + 1) % hash_set->size;
} while (i != h);
// visited all hash table entries -> not found
GGML_ASSERT(false);
}
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -14623,7 +14623,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
} }
if (nbytes % ggml_type_size(type) != 0) { if (nbytes % ggml_type_size(type) != 0) {
fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type); fprintf(stderr, "%s: invalid size %zu for type %s (type size = %zu)\n", __func__, nbytes, ggml_type_name(type), ggml_type_size(type));
return false; return false;
} }

View file

@ -141,23 +141,25 @@ typedef pthread_t ggml_thread_t;
#include <sys/wait.h> #include <sys/wait.h>
void ggml_print_backtrace(void) { #if defined(__linux__)
/* #include <execinfo.h>
#include <execinfo.h> static void ggml_print_backtrace_symbols(void) {
#include <dlfcn.h>
void * trace[100]; void * trace[100];
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0])); int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO); backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
*/ }
#else
static void ggml_print_backtrace_symbols(void) {
// platform not supported
}
#endif
// backtrack_symbols does not show line numbers, use gdb instead static void ggml_print_backtrace(void) {
char attach[32]; char attach[32];
snprintf(attach, sizeof(attach), "attach %d", getpid()); snprintf(attach, sizeof(attach), "attach %d", getpid());
int pid = fork(); int pid = fork();
if (pid == 0) { if (pid == 0) {
// try gdb
execlp("gdb", "gdb", "--batch", execlp("gdb", "gdb", "--batch",
"-ex", "set style enabled on", "-ex", "set style enabled on",
"-ex", attach, "-ex", attach,
@ -165,16 +167,37 @@ void ggml_print_backtrace(void) {
"-ex", "detach", "-ex", "detach",
"-ex", "quit", "-ex", "quit",
(char *) NULL); (char *) NULL);
// try lldb
execlp("lldb", "lldb", "--batch",
"-o", "bt",
"-o", "quit",
"-p", attach,
(char *) NULL);
exit(EXIT_FAILURE);
} else { } else {
waitpid(pid, NULL, 0); int wstatus;
waitpid(pid, &wstatus, 0);
if (WIFEXITED(wstatus)) {
if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
// gdb failed, fallback to backtrace_symbols
ggml_print_backtrace_symbols();
}
}
} }
} }
#else #else
void ggml_print_backtrace(void) { static void ggml_print_backtrace(void) {
// platform not supported // platform not supported
} }
#endif #endif
void ggml_abort(const char * file, int line, const char * expr) {
fflush(stdout);
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", file, line, expr);
ggml_print_backtrace();
abort();
}
#define GGML_DEBUG 0 #define GGML_DEBUG 0
#define GGML_GELU_FP16 #define GGML_GELU_FP16
#define GGML_GELU_QUICK_FP16 #define GGML_GELU_QUICK_FP16
@ -3372,7 +3395,7 @@ static inline int ggml_up(int n, int m) {
} }
// assert that pointer is aligned to GGML_MEM_ALIGN // assert that pointer is aligned to GGML_MEM_ALIGN
#define ggml_assert_aligned(ptr) \ #define GGML_ASSERT_ALIGNED(ptr) \
GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0) GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -3473,7 +3496,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
GGML_ASSERT(ctx->mem_buffer != NULL); GGML_ASSERT(ctx->mem_buffer != NULL);
ggml_assert_aligned(ctx->mem_buffer); GGML_ASSERT_ALIGNED(ctx->mem_buffer);
GGML_PRINT_DEBUG("%s: context initialized\n", __func__); GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
@ -3605,7 +3628,7 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
.type = type, .type = type,
}; };
ggml_assert_aligned(mem_buffer + obj_new->offs); GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
if (obj_cur != NULL) { if (obj_cur != NULL) {
obj_cur->next = obj_new; obj_cur->next = obj_new;
@ -3706,7 +3729,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
#endif #endif
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
//ggml_assert_aligned(result->data); //GGML_ASSERT_ALIGNED(result->data);
for (int i = 0; i < n_dims; i++) { for (int i = 0; i < n_dims; i++) {
result->ne[i] = ne[i]; result->ne[i] = ne[i];
@ -4270,8 +4293,11 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
} }
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) { struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
strncpy(tensor->name, name, sizeof(tensor->name) - 1); size_t i;
tensor->name[sizeof(tensor->name) - 1] = '\0'; for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
tensor->name[i] = name[i];
}
tensor->name[i] = '\0';
return tensor; return tensor;
} }
@ -16963,7 +16989,25 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
static size_t ggml_hash_size(size_t min_sz) { struct ggml_hash_set ggml_hash_set_new(size_t size) {
size = ggml_hash_size(size);
struct ggml_hash_set result;
result.size = size;
result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
return result;
}
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
}
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
GGML_FREE(hash_set->used);
GGML_FREE(hash_set->keys);
}
size_t ggml_hash_size(size_t min_sz) {
// next primes after powers of two // next primes after powers of two
static const size_t primes[] = { static const size_t primes[] = {
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031, 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
@ -16974,7 +17018,7 @@ static size_t ggml_hash_size(size_t min_sz) {
}; };
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]); static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
// find the smallest prime that is larger or equal to min_sz // find the smallest prime that is larger or equal than min_sz
size_t l = 0; size_t l = 0;
size_t r = n_primes; size_t r = n_primes;
while (l < r) { while (l < r) {
@ -16989,67 +17033,6 @@ static size_t ggml_hash_size(size_t min_sz) {
return sz; return sz;
} }
static size_t ggml_hash(const void * p) {
return (size_t)p;
}
size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
size_t h = ggml_hash(key) % hash_set.size;
// linear probing
size_t i = h;
while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
i = (i + 1) % hash_set.size;
if (i == h) {
// visited all hash table entries -> not found
return GGML_HASHTABLE_FULL;
}
}
return i;
}
bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
size_t i = ggml_hash_find(hash_set, key);
return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
}
size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
size_t i = ggml_hash_find(hash_set, key);
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
if (hash_set.keys[i] == key) {
return GGML_HASHTABLE_ALREADY_EXISTS;
}
// insert
GGML_ASSERT(hash_set.keys[i] == NULL);
hash_set.keys[i] = key;
return i;
}
size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
size_t i = ggml_hash_find(hash_set, key);
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
hash_set.keys[i] = key;
return i;
}
struct ggml_hash_set ggml_hash_set_new(size_t size) {
size = ggml_hash_size(size);
struct ggml_hash_set result;
result.size = size;
result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
return result;
}
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
GGML_FREE(hash_set.keys);
}
struct hash_map { struct hash_map {
struct ggml_hash_set set; struct ggml_hash_set set;
struct ggml_tensor ** vals; struct ggml_tensor ** vals;
@ -17058,13 +17041,12 @@ struct hash_map {
static struct hash_map * ggml_new_hash_map(size_t size) { static struct hash_map * ggml_new_hash_map(size_t size) {
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map)); struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
result->set = ggml_hash_set_new(size); result->set = ggml_hash_set_new(size);
result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size); result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
return result; return result;
} }
static void ggml_hash_map_free(struct hash_map * map) { static void ggml_hash_map_free(struct hash_map * map) {
ggml_hash_set_free(map->set); ggml_hash_set_free(&map->set);
GGML_FREE(map->vals); GGML_FREE(map->vals);
GGML_FREE(map); GGML_FREE(map);
} }
@ -17085,7 +17067,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
return node; return node;
} }
if (!ggml_hash_contains(graph->visited_hash_table, node)) { if (!ggml_hash_contains(&graph->visited_hash_set, node)) {
return node; return node;
} }
@ -17100,8 +17082,8 @@ static struct ggml_tensor * ggml_recompute_graph_node(
return node; return node;
} }
size_t i = ggml_hash_find(replacements->set, node); size_t i = ggml_hash_find(&replacements->set, node);
GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full GGML_ASSERT(i != GGML_HASHSET_FULL); // assert that not full
if (replacements->set.keys[i] == node) { if (replacements->set.keys[i] == node) {
return replacements->vals[i]; return replacements->vals[i];
} }
@ -17159,8 +17141,8 @@ void ggml_build_backward_gradient_checkpointing(
// insert checkpoints in replacements // insert checkpoints in replacements
for (int i = 0; i < n_checkpoints; ++i) { for (int i = 0; i < n_checkpoints; ++i) {
size_t k = ggml_hash_find(replacements->set, checkpoints[i]); size_t k = ggml_hash_find(&replacements->set, checkpoints[i]);
GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full GGML_ASSERT(k != GGML_HASHSET_FULL); // assert that not full
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
replacements->set.keys[k] = checkpoints[i]; replacements->set.keys[k] = checkpoints[i];
replacements->vals[k] = checkpoints[i]; replacements->vals[k] = checkpoints[i];
@ -17188,7 +17170,7 @@ void ggml_build_backward_gradient_checkpointing(
// functions to change gradients considering the case that input a might be initial gradient with zero value // functions to change gradients considering the case that input a might be initial gradient with zero value
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) { static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) { if (ggml_hash_contains(zero_table, a)) {
return b; return b;
} else { } else {
@ -17196,7 +17178,7 @@ static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct gg
} }
} }
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) { static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) { if (ggml_hash_contains(zero_table, a)) {
struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f); struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f);
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false); return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
@ -17205,7 +17187,7 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg
} }
} }
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) { static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) { if (ggml_hash_contains(zero_table, a)) {
return ggml_repeat(ctx, b, a); return ggml_repeat(ctx, b, a);
} else { } else {
@ -17213,7 +17195,7 @@ static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct g
} }
} }
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) { static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) { if (ggml_hash_contains(zero_table, a)) {
return ggml_neg(ctx, b); return ggml_neg(ctx, b);
} else { } else {
@ -17221,7 +17203,7 @@ static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct gg
} }
} }
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) { static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set * zero_table) {
struct ggml_tensor * src0 = tensor->src[0]; struct ggml_tensor * src0 = tensor->src[0];
struct ggml_tensor * src1 = tensor->src[1]; struct ggml_tensor * src1 = tensor->src[1];
struct ggml_tensor * src2 = tensor->src[2]; struct ggml_tensor * src2 = tensor->src[2];
@ -18049,7 +18031,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
} }
// check if already visited // check if already visited
if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) { if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
return; return;
} }
@ -18131,7 +18113,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size); struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
for (int i = 0; i < gf->n_nodes; i++) { for (int i = 0; i < gf->n_nodes; i++) {
if (gf->grads[i]) { if (gf->grads[i]) {
ggml_hash_insert(zero_table, gf->grads[i]); ggml_hash_insert(&zero_table, gf->grads[i]);
} }
} }
@ -18141,7 +18123,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
// inplace operations to add gradients are not created by ggml_compute_backward // inplace operations to add gradients are not created by ggml_compute_backward
// use allocator to automatically make inplace operations // use allocator to automatically make inplace operations
if (node->grad) { if (node->grad) {
ggml_compute_backward(ctx, node, zero_table); ggml_compute_backward(ctx, node, &zero_table);
} }
} }
@ -18154,16 +18136,29 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
} }
} }
ggml_hash_set_free(zero_table); ggml_hash_set_free(&zero_table);
}
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
void * ptr = *p;
ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
*p = (void *) ((char *) ptr + size);
return ptr;
} }
static size_t ggml_graph_nbytes(size_t size, bool grads) { static size_t ggml_graph_nbytes(size_t size, bool grads) {
size_t nbytes = sizeof(struct ggml_cgraph); size_t hash_size = ggml_hash_size(size * 2);
nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes void * p = 0;
incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
if (grads) { if (grads) {
nbytes += size * sizeof(struct ggml_tensor *); // grads incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
} }
nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
size_t nbytes = (size_t) p;
return nbytes; return nbytes;
} }
@ -18180,19 +18175,19 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size); struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1); // the size of the hash table is doubled since it needs to hold both nodes and leafs
size_t hash_size = ggml_hash_size(size * 2); size_t hash_size = ggml_hash_size(size * 2);
struct ggml_tensor ** nodes_ptr = data_start;
struct ggml_tensor ** leafs_ptr = nodes_ptr + size; void * p = cgraph + 1;
struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL; struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
// check that we allocated the correct amount of memory // check that we allocated the correct amount of memory
assert(obj_size == (size_t) ( assert(obj_size == (size_t)((char *)p - (char *)cgraph));
(grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
*cgraph = (struct ggml_cgraph) { *cgraph = (struct ggml_cgraph) {
/*.size =*/ size, /*.size =*/ size,
@ -18201,10 +18196,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
/*.nodes =*/ nodes_ptr, /*.nodes =*/ nodes_ptr,
/*.grads =*/ grads_ptr, /*.grads =*/ grads_ptr,
/*.leafs =*/ leafs_ptr, /*.leafs =*/ leafs_ptr,
/*.hash_table =*/ { hash_size, hash_keys_ptr }, /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
}; };
ggml_hash_set_reset(&cgraph->visited_hash_set);
return cgraph; return cgraph;
} }
@ -18220,7 +18217,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
/*.nodes =*/ cgraph0->nodes + i0, /*.nodes =*/ cgraph0->nodes + i0,
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL, /*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
/*.leafs =*/ NULL, /*.leafs =*/ NULL,
/*.hash_table =*/ { 0, NULL }, /*.hash_table =*/ { 0, NULL, NULL },
/*.order =*/ cgraph0->order, /*.order =*/ cgraph0->order,
}; };
@ -18230,7 +18227,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
GGML_ASSERT(dst->size >= src->n_leafs); GGML_ASSERT(dst->size >= src->n_leafs);
GGML_ASSERT(dst->size >= src->n_nodes); GGML_ASSERT(dst->size >= src->n_nodes);
GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size); GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
dst->n_leafs = src->n_leafs; dst->n_leafs = src->n_leafs;
dst->n_nodes = src->n_nodes; dst->n_nodes = src->n_nodes;
@ -18251,9 +18248,9 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
} }
} }
for (size_t i = 0; i < src->visited_hash_table.size; ++i) { for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
if (src->visited_hash_table.keys[i]) { if (src->visited_hash_set.keys[i]) {
ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]); ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
} }
} }
} }
@ -18279,7 +18276,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
void ggml_graph_clear(struct ggml_cgraph * cgraph) { void ggml_graph_clear(struct ggml_cgraph * cgraph) {
cgraph->n_leafs = 0; cgraph->n_leafs = 0;
cgraph->n_nodes = 0; cgraph->n_nodes = 0;
memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *)); ggml_hash_set_reset(&cgraph->visited_hash_set);
} }
// //