From 0c2c9c7545e687538172478de98b605c3233223a Mon Sep 17 00:00:00 2001
From: xaedes <xaedes@gmail.com>
Date: Wed, 6 Sep 2023 22:45:36 +0200
Subject: [PATCH] fix gradient accumulation bug where the same batch was used
 for each microstep

---
 examples/finetune/finetune.cpp                               | 2 +-
 examples/train-text-from-scratch/train-text-from-scratch.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index ed6bd8793..d7c0a3360 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -2449,7 +2449,7 @@ void opt_callback(void * vdata, int accum_step, float * sched) {
         data->samples_size,
         data->tokens_data,
         data->tokens_size,
-        opt->iter * params->n_gradient_accumulation,
+        opt->iter*params->n_gradient_accumulation + accum_step,
         data->tokens_input,
         data->target_probs);
 
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 0a486f553..bfe788a79 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -1846,7 +1846,7 @@ void opt_callback(void * vdata, int accum_step, float * sched) {
         data->samples_size,
         data->tokens_data,
         data->tokens_size,
-        opt->iter * params->n_gradient_accumulation,
+        opt->iter*params->n_gradient_accumulation + accum_step,
         data->tokens_input,
         data->target_logits,
         data->target_probs);