From 0c2c9c7545e687538172478de98b605c3233223a Mon Sep 17 00:00:00 2001 From: xaedes Date: Wed, 6 Sep 2023 22:45:36 +0200 Subject: [PATCH] fix gradient accumulation bug where the same batch was used for each microstep --- examples/finetune/finetune.cpp | 2 +- examples/train-text-from-scratch/train-text-from-scratch.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index ed6bd8793..d7c0a3360 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -2449,7 +2449,7 @@ void opt_callback(void * vdata, int accum_step, float * sched) { data->samples_size, data->tokens_data, data->tokens_size, - opt->iter * params->n_gradient_accumulation, + opt->iter*params->n_gradient_accumulation + accum_step, data->tokens_input, data->target_probs); diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 0a486f553..bfe788a79 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1846,7 +1846,7 @@ void opt_callback(void * vdata, int accum_step, float * sched) { data->samples_size, data->tokens_data, data->tokens_size, - opt->iter * params->n_gradient_accumulation, + opt->iter*params->n_gradient_accumulation + accum_step, data->tokens_input, data->target_logits, data->target_probs);