From 5884b43a622a88f7f2fddf73f97ae9f50137efdc Mon Sep 17 00:00:00 2001 From: xaedes Date: Mon, 14 Aug 2023 17:58:49 +0200 Subject: [PATCH] add input tensors as checkpoints so that recursive tensor cloning of gradient checkpointing terminates on input tensors --- examples/train-text-from-scratch/train-text-from-scratch.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index d5fde1ca5..48edf3651 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1579,7 +1579,10 @@ struct ggml_tensor * llama_build_train_graphs( struct ggml_tensor * cur = t01; std::vector checkpoints; - checkpoints.push_back(cur); + checkpoints.push_back(tokens_input); + checkpoints.push_back(targets); + checkpoints.push_back(t00); + checkpoints.push_back(t01); struct ggml_tensor * kv_scale; if (flash_attn) {