add input tensors as checkpoints
so that recursive tensor cloning of gradient checkpointing terminates on input tensors
This commit is contained in:
parent
b2f1310196
commit
5884b43a62
1 changed files with 4 additions and 1 deletions
|
@ -1579,7 +1579,10 @@ struct ggml_tensor * llama_build_train_graphs(
|
||||||
struct ggml_tensor * cur = t01;
|
struct ggml_tensor * cur = t01;
|
||||||
|
|
||||||
std::vector<struct ggml_tensor *> checkpoints;
|
std::vector<struct ggml_tensor *> checkpoints;
|
||||||
checkpoints.push_back(cur);
|
checkpoints.push_back(tokens_input);
|
||||||
|
checkpoints.push_back(targets);
|
||||||
|
checkpoints.push_back(t00);
|
||||||
|
checkpoints.push_back(t01);
|
||||||
|
|
||||||
struct ggml_tensor * kv_scale;
|
struct ggml_tensor * kv_scale;
|
||||||
if (flash_attn) {
|
if (flash_attn) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue