diff --git a/ggml.c b/ggml.c index c137ae658..174b5d3f5 100644 --- a/ggml.c +++ b/ggml.c @@ -4411,7 +4411,7 @@ void ggml_free(struct ggml_context * ctx) { g_state.contexts[i].used = false; GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n", - __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); + __func__, i, ggml_used_mem(ctx)); if (ctx->mem_buffer_owned) { GGML_ALIGNED_FREE(ctx->mem_buffer); @@ -16249,6 +16249,7 @@ struct ggml_compute_state { struct ggml_compute_state_shared * shared; }; +#ifdef GGML_PERF static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) { int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles; int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us; @@ -16257,6 +16258,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const node->perf_cycles += cycles_cur; node->perf_time_us += time_us_cur; } +#endif static thread_ret_t ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; @@ -16293,8 +16295,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { if (GGML_OP_HAS_FINALIZE[node->op]) { params.nth = n_tasks_arr[node_n]; ggml_compute_forward(¶ms, node); - ggml_graph_compute_perf_stats_node(node, state->shared); } +#ifdef GGML_PERF + ggml_graph_compute_perf_stats_node(node, state->shared); +#endif } // distribute new work or execute it direct if 1T @@ -16303,10 +16307,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { struct ggml_tensor * node = cgraph->nodes[node_n]; const int n_tasks = n_tasks_arr[node_n]; - +#ifdef GGML_PERF state->shared->perf_node_start_cycles = ggml_perf_cycles(); state->shared->perf_node_start_time_us = ggml_perf_time_us(); - +#endif params.nth = n_tasks; /* INIT */ @@ -16324,8 +16328,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { if (GGML_OP_HAS_FINALIZE[node->op]) { params.type = GGML_TASK_FINALIZE; ggml_compute_forward(¶ms, node); - ggml_graph_compute_perf_stats_node(node, state->shared); } +#ifdef GGML_PERF + ggml_graph_compute_perf_stats_node(node, state->shared); +#endif } else { break; } @@ -16864,9 +16870,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char } void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { - //assert(cgraph->work == NULL); - //assert(cgraph->work_size == 0); - uint64_t size_eval = 0; // compute size of intermediate results @@ -17305,9 +17308,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { GGML_PRINT("=== GRAPH ===\n"); - GGML_PRINT_DEBUG("n_threads = %d\n", cgraph->n_threads); - GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size); - GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes); for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i];