add input tensors as checkpoints
so that recursive tensor cloning of gradient checkpointing terminates on input tensors
This commit is contained in:
parent
b2f1310196
commit
5884b43a62
1 changed files with 4 additions and 1 deletions
|
@ -1579,7 +1579,10 @@ struct ggml_tensor * llama_build_train_graphs(
|
|||
struct ggml_tensor * cur = t01;
|
||||
|
||||
std::vector<struct ggml_tensor *> checkpoints;
|
||||
checkpoints.push_back(cur);
|
||||
checkpoints.push_back(tokens_input);
|
||||
checkpoints.push_back(targets);
|
||||
checkpoints.push_back(t00);
|
||||
checkpoints.push_back(t01);
|
||||
|
||||
struct ggml_tensor * kv_scale;
|
||||
if (flash_attn) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue