From 38f4438c32def72e7a0fd42f9caba9df80a5cc32 Mon Sep 17 00:00:00 2001
From: xaedes <xaedes@gmail.com>
Date: Mon, 14 Aug 2023 18:07:16 +0200
Subject: [PATCH] make sure some tensors are not reallocated by inserting new
 temporary nodes depending on them:

output and parameter gradient tensors need to be available at the end of the graph execution

parameter gradient tensors also need to be available before the graph execution because they are set to zero before each optimizer iteration

checkpoint tensors are allocated all together to reduce memory allocator fragmentation

afterwards, in addition to the temporary nodes, we also need to reset the temporary leafs
---
 .../train-text-from-scratch.cpp               | 31 ++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 88a1c3a50..058325059 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -1656,19 +1656,36 @@ struct ggml_tensor * llama_build_train_graphs(
     }
 
     if (alloc) {
-        // make sure t35 and t36 are not reallocated by inserting new temporary node depending on them
-        struct ggml_tensor * dep = ggml_scale_inplace(ctx, t35, t36);
+        // make sure some tensors are not reallocated by inserting new temporary nodes depending on them
+        int n_leafs_before = gb->n_leafs;
         int n_nodes_before = gb->n_nodes;
-        ggml_build_forward_expand(gb, dep);
+        struct ggml_tensor * one = ggml_new_f32(ctx, 1.0f);
+        // output tensors
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t35, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36, one));
+        // gradient tensors (will be set to zero by ggml_graph_reset)
+        for (int i = 0; i < gf->n_nodes; ++i) {
+            if (!gf->grads[i]) continue;
+            ggml_allocr_alloc(alloc, gf->grads[i]);
+            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, gf->grads[i], one));
+        }
+        for (int i = 0; i < checkpoints.size(); ++i) {
+            ggml_allocr_alloc(alloc, checkpoints[i]);
+        }
 
+        int n_leafs_after = gb->n_leafs;
         int n_nodes_after = gb->n_nodes;
-        GGML_ASSERT(n_nodes_after == n_nodes_before + 1);
 
-        ggml_allocr_reset(alloc);
         ggml_allocr_alloc_graph(alloc, gb);
 
-        // remove the additional node that was insert
-        gb->nodes[n_nodes_after-1] = NULL;
+        // remove the additional nodes and leafs
+        for (int i = n_leafs_before; i < gb->n_leafs; ++i) {
+            gb->leafs[i] = NULL;
+        }
+        for (int i = n_nodes_before; i < gb->n_nodes; ++i) {
+            gb->nodes[i] = NULL;
+        }
+        gb->n_leafs = n_leafs_before;
         gb->n_nodes = n_nodes_before;
     }