add input tensors as checkpoints

so that recursive tensor cloning of gradient checkpointing terminates on input tensors
2023-08-14 17:58:49 +02:00 · 2023-08-14 17:58:49 +02:00 · 5884b43a62
commit 5884b43a62
parent b2f1310196
1 changed files with 4 additions and 1 deletions
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -1579,7 +1579,10 @@ struct ggml_tensor * llama_build_train_graphs(
    struct ggml_tensor * cur = t01;

    std::vector<struct ggml_tensor *> checkpoints;
-    checkpoints.push_back(cur);
+    checkpoints.push_back(tokens_input);
+    checkpoints.push_back(targets);
+    checkpoints.push_back(t00);
+    checkpoints.push_back(t01);

    struct ggml_tensor * kv_scale;
    if (flash_attn) {