From 571dc94da986b6f039ad9a16d5080d72dec186b0 Mon Sep 17 00:00:00 2001 From: xaedes Date: Sat, 16 Sep 2023 20:23:05 +0200 Subject: [PATCH] increase train_samples by used_samples instead of number of batches on batch can contain more than one sample when option "fill_with_next_samples" is used --- common/train.cpp | 6 +++--- examples/finetune/finetune.cpp | 1 - .../train-text-from-scratch/train-text-from-scratch.cpp | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/common/train.cpp b/common/train.cpp index 287fe5d93..9357fab0f 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -1367,9 +1367,8 @@ void train_opt_callback(void * vdata, int accum_step, float * sched) { const bool save_now = (params->save_every > 0) && (opt->iter - data->last_save_iter >= params->save_every); if (save_now) { int new_iters = opt->iter - data->last_save_iter; - train->train_its += new_iters; - train->train_samples += new_iters * opt->params.n_gradient_accumulation * n_batch; - train->train_tokens += new_iters * opt->params.n_gradient_accumulation * n_batch * n_ctx; + train->train_its += new_iters; + train->train_tokens += new_iters * opt->params.n_gradient_accumulation * n_batch * n_ctx; if (data->save_cb) { data->save_cb(data->save_data, train); @@ -1431,6 +1430,7 @@ void train_opt_callback(void * vdata, int accum_step, float * sched) { params->separate_with_bos, params->fill_with_next_samples); + train->train_samples += used_samples; train->shuffle_next_sample += used_samples; if (train->shuffle_next_sample >= train->shuffle_sample_count) { diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 5c6fa639c..c1227897c 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -1938,7 +1938,6 @@ int main(int argc, char ** argv) { int new_iters = opt->iter - opt_cb_data.last_save_iter; if (new_iters > 0) { train->train_its += new_iters; - train->train_samples += new_iters * opt->params.n_gradient_accumulation * n_batch; train->train_tokens += new_iters * opt->params.n_gradient_accumulation * n_batch * n_tokens; save_train_files(&save_data, train); diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index c54727ec5..88174e064 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1183,7 +1183,6 @@ int main(int argc, char ** argv) { int new_iters = opt->iter - opt_cb_data.last_save_iter; if (new_iters > 0) { train->train_its += new_iters; - train->train_samples += new_iters * opt->params.n_gradient_accumulation * n_batch; train->train_tokens += new_iters * opt->params.n_gradient_accumulation * n_batch * n_tokens; save_train_files(&save_data, train);