fixed runtime bugs and compile errors related to GGML_PERF and GGML_DEBUG

This commit is contained in:
mqy 2023-07-14 20:21:53 +08:00
parent 32c5411631
commit 218ab9ef89

22
ggml.c
View file

@ -4411,7 +4411,7 @@ void ggml_free(struct ggml_context * ctx) {
g_state.contexts[i].used = false; g_state.contexts[i].used = false;
GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n", GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); __func__, i, ggml_used_mem(ctx));
if (ctx->mem_buffer_owned) { if (ctx->mem_buffer_owned) {
GGML_ALIGNED_FREE(ctx->mem_buffer); GGML_ALIGNED_FREE(ctx->mem_buffer);
@ -16249,6 +16249,7 @@ struct ggml_compute_state {
struct ggml_compute_state_shared * shared; struct ggml_compute_state_shared * shared;
}; };
#ifdef GGML_PERF
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) { static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles; int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us; int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
@ -16257,6 +16258,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
node->perf_cycles += cycles_cur; node->perf_cycles += cycles_cur;
node->perf_time_us += time_us_cur; node->perf_time_us += time_us_cur;
} }
#endif
static thread_ret_t ggml_graph_compute_thread(void * data) { static thread_ret_t ggml_graph_compute_thread(void * data) {
struct ggml_compute_state * state = (struct ggml_compute_state *) data; struct ggml_compute_state * state = (struct ggml_compute_state *) data;
@ -16293,8 +16295,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (GGML_OP_HAS_FINALIZE[node->op]) { if (GGML_OP_HAS_FINALIZE[node->op]) {
params.nth = n_tasks_arr[node_n]; params.nth = n_tasks_arr[node_n];
ggml_compute_forward(&params, node); ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
} }
#ifdef GGML_PERF
ggml_graph_compute_perf_stats_node(node, state->shared);
#endif
} }
// distribute new work or execute it direct if 1T // distribute new work or execute it direct if 1T
@ -16303,10 +16307,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
struct ggml_tensor * node = cgraph->nodes[node_n]; struct ggml_tensor * node = cgraph->nodes[node_n];
const int n_tasks = n_tasks_arr[node_n]; const int n_tasks = n_tasks_arr[node_n];
#ifdef GGML_PERF
state->shared->perf_node_start_cycles = ggml_perf_cycles(); state->shared->perf_node_start_cycles = ggml_perf_cycles();
state->shared->perf_node_start_time_us = ggml_perf_time_us(); state->shared->perf_node_start_time_us = ggml_perf_time_us();
#endif
params.nth = n_tasks; params.nth = n_tasks;
/* INIT */ /* INIT */
@ -16324,8 +16328,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (GGML_OP_HAS_FINALIZE[node->op]) { if (GGML_OP_HAS_FINALIZE[node->op]) {
params.type = GGML_TASK_FINALIZE; params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node); ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
} }
#ifdef GGML_PERF
ggml_graph_compute_perf_stats_node(node, state->shared);
#endif
} else { } else {
break; break;
} }
@ -16864,9 +16870,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
} }
void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
//assert(cgraph->work == NULL);
//assert(cgraph->work_size == 0);
uint64_t size_eval = 0; uint64_t size_eval = 0;
// compute size of intermediate results // compute size of intermediate results
@ -17305,9 +17308,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
GGML_PRINT("=== GRAPH ===\n"); GGML_PRINT("=== GRAPH ===\n");
GGML_PRINT_DEBUG("n_threads = %d\n", cgraph->n_threads);
GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size);
GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes); GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
for (int i = 0; i < cgraph->n_nodes; i++) { for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i]; struct ggml_tensor * node = cgraph->nodes[i];