diff --git a/ggml.c b/ggml.c
index c137ae658..174b5d3f5 100644
--- a/ggml.c
+++ b/ggml.c
@@ -4411,7 +4411,7 @@ void ggml_free(struct ggml_context * ctx) {
             g_state.contexts[i].used = false;
 
             GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
-                    __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
+                    __func__, i, ggml_used_mem(ctx));
 
             if (ctx->mem_buffer_owned) {
                 GGML_ALIGNED_FREE(ctx->mem_buffer);
@@ -16249,6 +16249,7 @@ struct ggml_compute_state {
     struct ggml_compute_state_shared * shared;
 };
 
+#ifdef GGML_PERF
 static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
     int64_t cycles_cur  = ggml_perf_cycles()  - st->perf_node_start_cycles;
     int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
@@ -16257,6 +16258,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
     node->perf_cycles  += cycles_cur;
     node->perf_time_us += time_us_cur;
 }
+#endif
 
 static thread_ret_t ggml_graph_compute_thread(void * data) {
     struct ggml_compute_state * state = (struct ggml_compute_state *) data;
@@ -16293,8 +16295,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                 if (GGML_OP_HAS_FINALIZE[node->op]) {
                     params.nth = n_tasks_arr[node_n];
                     ggml_compute_forward(&params, node);
-                    ggml_graph_compute_perf_stats_node(node, state->shared);
                 }
+#ifdef GGML_PERF
+                ggml_graph_compute_perf_stats_node(node, state->shared);
+#endif
             }
 
             // distribute new work or execute it direct if 1T
@@ -16303,10 +16307,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
 
                 struct ggml_tensor * node = cgraph->nodes[node_n];
                 const int n_tasks = n_tasks_arr[node_n];
-
+#ifdef GGML_PERF
                 state->shared->perf_node_start_cycles  = ggml_perf_cycles();
                 state->shared->perf_node_start_time_us = ggml_perf_time_us();
-
+#endif
                 params.nth = n_tasks;
 
                 /* INIT */
@@ -16324,8 +16328,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
                     if (GGML_OP_HAS_FINALIZE[node->op]) {
                         params.type = GGML_TASK_FINALIZE;
                         ggml_compute_forward(&params, node);
-                        ggml_graph_compute_perf_stats_node(node, state->shared);
                     }
+#ifdef GGML_PERF
+                    ggml_graph_compute_perf_stats_node(node, state->shared);
+#endif
                 } else {
                     break;
                 }
@@ -16864,9 +16870,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
 }
 
 void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
-    //assert(cgraph->work      == NULL);
-    //assert(cgraph->work_size == 0);
-
     uint64_t size_eval = 0;
 
     // compute size of intermediate results
@@ -17305,9 +17308,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
 
     GGML_PRINT("=== GRAPH ===\n");
 
-    GGML_PRINT_DEBUG("n_threads       = %d\n",        cgraph->n_threads);
-    GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size);
-
     GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
     for (int i = 0; i < cgraph->n_nodes; i++) {
         struct ggml_tensor * node = cgraph->nodes[i];