diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index ff6167da8..f933c0164 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -953,7 +953,7 @@ struct ggml_tensor * forward_batch_wo_cache( const int N = n_tokens; const auto & hparams = model->hparams; - const int n_ctx = hparams.n_ctx; + //const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; @@ -1181,7 +1181,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn( const int N = n_tokens; const auto & hparams = model->hparams; - const int n_ctx = hparams.n_ctx; + //const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; @@ -1368,7 +1368,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train( gf->work = NULL; const auto & hparams = model->hparams; - const int n_ctx = hparams.n_ctx; + //const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; @@ -1894,7 +1894,7 @@ void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) { void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) { for (int i1=0; i1ne[1]; ++i1) { - int num_newline = 0; + //int num_newline = 0; for (int i0=0; i0ne[0]; ++i0) { int token = get_i32_2d(tokens, i0, i1); print_token(ctx, token); @@ -1920,7 +1920,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons int n_tokens = tokens_input->ne[0]; int n_vocab = target_logits->ne[0]; - int sample = train_samples[example_id % n_train_samples]; + size_t sample = train_samples[example_id % n_train_samples]; GGML_ASSERT(sample+n_tokens-1 < n_train_data); ggml_set_f32(target_logits, -1.0f/n_vocab); @@ -1936,7 +1936,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons } } -void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { +void get_example_targets_batch(struct llama_context * /*lctx*/, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { GGML_ASSERT(tokens_input->n_dims == 2); GGML_ASSERT(target_logits->n_dims == 3); GGML_ASSERT(target_probs->n_dims == 3); @@ -1953,7 +1953,7 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa ggml_set_f32(target_probs, 0.0f); for (int k=0; k= end) { @@ -2264,7 +2264,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam } void set_logits_masked(struct ggml_tensor * logits, std::vector& mask, float value) { - GGML_ASSERT(logits->ne[0] == mask.size()); + GGML_ASSERT(logits->ne[0] == (int64_t) mask.size()); for (int i2 = 0; i2 < logits->ne[2]; ++i2) { for (int i1 = 0; i1 < logits->ne[1]; ++i1) { for (int i0 = 0; i0 < logits->ne[0]; ++i0) { @@ -2301,7 +2301,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { } void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) { - uint32_t nd = file->read_u32(); + int32_t nd = file->read_u32(); GGML_ASSERT(nd == tensor->n_dims); uint32_t name_len = file->read_u32(); @@ -3003,7 +3003,7 @@ int main(int argc, char ** argv) { if (tokenize_file(lctx, params.fn_train_data, train_tokens) < 0) { fprintf(stderr, "%s: failed to tokenize file '%s'\n", __func__, params.fn_train_data); } - printf("%s: number of training tokens: %d\n", __func__, train_tokens.size()); + printf("%s: number of training tokens: %d\n", __func__, (int) train_tokens.size()); struct my_llama_model model; model.hparams.n_vocab = llama_n_vocab(lctx); @@ -3020,7 +3020,7 @@ int main(int argc, char ** argv) { std::vector token_notavail; token_noccurs.resize(model.hparams.n_vocab, 0); token_notavail.resize(model.hparams.n_vocab, true); - for (int i=0; i token_freq; token_freq.resize(model.hparams.n_vocab, 0); int n_unique_tokens = 0; - for (int i=0; i 0) ? 1 : 0; } @@ -3104,26 +3104,26 @@ int main(int argc, char ** argv) { uint8_t * compute_buf_1 = new uint8_t[size_buf_1]; uint8_t * compute_buf_2 = new uint8_t[size_buf_2]; - GGML_ASSERT(train_tokens.size() > n_tokens);; + GGML_ASSERT(n_tokens < (int) train_tokens.size()); std::vector train_samples; train_samples.push_back(0); - for (int i=1; i= train_samples.size()) { + for (int ex = 0; ex < params.n_examples; ++ex) { + if (ex*n_batch >= (int) train_samples.size()) { shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size()); - for (int i=0; ine[3]; const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; + //const int64_t ne11 = src1->ne[1]; const int64_t ne12 = src1->ne[2]; const int64_t ne13 = src1->ne[3]; @@ -10587,11 +10587,10 @@ static void ggml_compute_forward_out_prod_f32( const int64_t i02 = i2; const int64_t i03 = i3; - const int64_t i10 = i1; + //const int64_t i10 = i1; const int64_t i12 = i2; const int64_t i13 = i3; - for (int64_t i01 = 0; i01 < ne01; ++i01) { const int64_t i11 = i01; @@ -13956,8 +13955,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32( return; } - const float eps = 1e-9f; - + const double eps = 1e-9; // rows per thread const int dr = (nr + nth - 1)/nth; @@ -14002,7 +14000,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32( // sum = 1.0/sum; } // avoid log(0) by rescaling from [0..1] to [eps..1] - sum = (1.0f - eps) / sum; + sum = (1.0 - eps) / sum; ggml_vec_scale_f32(nc, st, sum); ggml_vec_add1_f32(nc, st, st, eps); ggml_vec_log_f32(nc, st, st); @@ -14054,8 +14052,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( const int64_t ith = params->ith; const int64_t nth = params->nth; - float * sums = (float *) params->wdata; - if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } @@ -14090,6 +14086,8 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( #endif // step by step explanation: { + //float * sums = (float *) params->wdata; + // forward pass with annotated gradients from backward pass // (built by going in reverse operation order, adding to gradients of current operation args) // st0 = exp(s0-max(s0)) grad[st0] = grad[st1]*(1.0 - eps)/sum @@ -14162,10 +14160,10 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( float dot_st1_dst1 = 0; ggml_vec_scale_f32(nc, sm, sum); ggml_vec_cpy_f32 (nc, ds0, sm); - ggml_vec_scale_f32(nc, ds0, (1.0 - eps)); + ggml_vec_scale_f32(nc, ds0, (1.0f - eps)); ggml_vec_add1_f32 (nc, ds0, ds0, eps); ggml_vec_div_f32 (nc, ds0, s1, ds0); - ggml_vec_scale_f32(nc, ds0, -(1.0 - eps)*d[0]); + ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]); ggml_vec_dot_f32 (nc, &dot_st1_dst1, sm, ds0); ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1); ggml_vec_mul_f32 (nc, ds0, ds0, sm); diff --git a/llama.h b/llama.h index 3947cf3e2..4694c9c85 100644 --- a/llama.h +++ b/llama.h @@ -193,9 +193,9 @@ extern "C" { // Returns number of results. LLAMA_API int llama_get_vocab( const struct llama_context * ctx, - const char * * strings, - float * scores, - int capacity); + const char * * strings, + float * scores, + int capacity); // Token logits obtained from the last call to llama_eval() // The logits for the last token are stored in the last row