fixed runtime bugs and compile errors related to GGML_PERF and GGML_DEBUG

2023-07-14 20:21:53 +08:00 · 2023-07-14 20:21:53 +08:00 · 218ab9ef89
commit 218ab9ef89
parent 32c5411631
1 changed files with 11 additions and 11 deletions
--- a/ggml.c
+++ b/ggml.c
@ -4411,7 +4411,7 @@ void ggml_free(struct ggml_context * ctx) {
            g_state.contexts[i].used = false;

            GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
-                    __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
+                    __func__, i, ggml_used_mem(ctx));

            if (ctx->mem_buffer_owned) {
                GGML_ALIGNED_FREE(ctx->mem_buffer);
@ -16249,6 +16249,7 @@ struct ggml_compute_state {
    struct ggml_compute_state_shared * shared;
 };

+#ifdef GGML_PERF
 static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
    int64_t cycles_cur  = ggml_perf_cycles()  - st->perf_node_start_cycles;
    int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
@ -16257,6 +16258,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
    node->perf_cycles  += cycles_cur;
    node->perf_time_us += time_us_cur;
 }
+#endif

 static thread_ret_t ggml_graph_compute_thread(void * data) {
    struct ggml_compute_state * state = (struct ggml_compute_state *) data;
@ -16293,8 +16295,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                if (GGML_OP_HAS_FINALIZE[node->op]) {
                    params.nth = n_tasks_arr[node_n];
                    ggml_compute_forward(&params, node);
-                    ggml_graph_compute_perf_stats_node(node, state->shared);
                }
+#ifdef GGML_PERF
+                ggml_graph_compute_perf_stats_node(node, state->shared);
+#endif
            }

            // distribute new work or execute it direct if 1T
@ -16303,10 +16307,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {

                struct ggml_tensor * node = cgraph->nodes[node_n];
                const int n_tasks = n_tasks_arr[node_n];
-
+#ifdef GGML_PERF
                state->shared->perf_node_start_cycles  = ggml_perf_cycles();
                state->shared->perf_node_start_time_us = ggml_perf_time_us();
-
+#endif
                params.nth = n_tasks;

                /* INIT */
@ -16324,8 +16328,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                    if (GGML_OP_HAS_FINALIZE[node->op]) {
                        params.type = GGML_TASK_FINALIZE;
                        ggml_compute_forward(&params, node);
-                        ggml_graph_compute_perf_stats_node(node, state->shared);
                    }
+#ifdef GGML_PERF
+                    ggml_graph_compute_perf_stats_node(node, state->shared);
+#endif
                } else {
                    break;
                }
@ -16864,9 +16870,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
 }

 void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
-    //assert(cgraph->work      == NULL);
-    //assert(cgraph->work_size == 0);
-
    uint64_t size_eval = 0;

    // compute size of intermediate results
@ -17305,9 +17308,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {

    GGML_PRINT("=== GRAPH ===\n");

-    GGML_PRINT_DEBUG("n_threads       = %d\n",        cgraph->n_threads);
-    GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size);
-
    GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
    for (int i = 0; i < cgraph->n_nodes; i++) {
        struct ggml_tensor * node = cgraph->nodes[i];