diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index ff6167da8..f933c0164 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -953,7 +953,7 @@ struct ggml_tensor * forward_batch_wo_cache(
     const int N = n_tokens;
 
     const auto & hparams = model->hparams;
-    const int n_ctx   = hparams.n_ctx;
+    //const int n_ctx   = hparams.n_ctx;
     const int n_vocab = hparams.n_vocab;
     const int n_embd  = hparams.n_embd;
     const int n_layer = hparams.n_layer;
@@ -1181,7 +1181,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
     const int N = n_tokens;
 
     const auto & hparams = model->hparams;
-    const int n_ctx   = hparams.n_ctx;
+    //const int n_ctx   = hparams.n_ctx;
     const int n_vocab = hparams.n_vocab;
     const int n_embd  = hparams.n_embd;
     const int n_layer = hparams.n_layer;
@@ -1368,7 +1368,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
     gf->work = NULL;
 
     const auto & hparams = model->hparams;
-    const int n_ctx      = hparams.n_ctx;
+    //const int n_ctx      = hparams.n_ctx;
     const int n_vocab    = hparams.n_vocab;
     const int n_embd     = hparams.n_embd;
     const int n_layer    = hparams.n_layer;
@@ -1894,7 +1894,7 @@ void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
 
 void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) {
     for (int i1=0; i1<tokens->ne[1]; ++i1) {
-        int num_newline = 0;
+        //int num_newline = 0;
         for (int i0=0; i0<tokens->ne[0]; ++i0) {
             int token = get_i32_2d(tokens, i0, i1);
             print_token(ctx, token);
@@ -1920,7 +1920,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
     int n_tokens = tokens_input->ne[0];
     int n_vocab  = target_logits->ne[0];
 
-    int sample = train_samples[example_id % n_train_samples];
+    size_t sample = train_samples[example_id % n_train_samples];
     GGML_ASSERT(sample+n_tokens-1 < n_train_data);
 
     ggml_set_f32(target_logits, -1.0f/n_vocab);
@@ -1936,7 +1936,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
     }
 }
 
-void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
+void get_example_targets_batch(struct llama_context * /*lctx*/, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
     GGML_ASSERT(tokens_input->n_dims  == 2);
     GGML_ASSERT(target_logits->n_dims == 3);
     GGML_ASSERT(target_probs->n_dims  == 3);
@@ -1953,7 +1953,7 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa
     ggml_set_f32(target_probs, 0.0f);
     for (int k=0; k<n_batch; ++k) {
         // printf("%s: batch %d\n", __func__, k);
-        int sample = train_samples[(example_id*n_batch + k) % n_train_samples];
+        size_t sample = train_samples[(example_id*n_batch + k) % n_train_samples];
         GGML_ASSERT(sample+n_tokens-1 < n_train_data);
 
         set_i32_2d(tokens_input, 0, k, llama_token_bos());
@@ -2120,7 +2120,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
     if (verify) {
         const char * in  = buf.data();
         const char * end = buf.data() + buf.size();
-        for (int i=0; i < out.size(); ++i) {
+        for (int i = 0; i < (int) out.size(); ++i) {
             const char * s = llama_token_to_str(lctx, out[i]);
             int len = strlen(s);
             if (in >= end) {
@@ -2264,7 +2264,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
 }
 
 void set_logits_masked(struct ggml_tensor * logits, std::vector<bool>& mask, float value) {
-    GGML_ASSERT(logits->ne[0] == mask.size());
+    GGML_ASSERT(logits->ne[0] == (int64_t) mask.size());
     for (int i2 = 0; i2 < logits->ne[2]; ++i2) {
         for (int i1 = 0; i1 < logits->ne[1]; ++i1) {
             for (int i0 = 0; i0 < logits->ne[0]; ++i0) {
@@ -2301,7 +2301,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
 }
 
 void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
-    uint32_t nd = file->read_u32();
+    int32_t nd = file->read_u32();
     GGML_ASSERT(nd == tensor->n_dims);
 
     uint32_t name_len       = file->read_u32();
@@ -3003,7 +3003,7 @@ int main(int argc, char ** argv) {
     if (tokenize_file(lctx, params.fn_train_data, train_tokens) < 0) {
         fprintf(stderr, "%s: failed to tokenize file '%s'\n", __func__, params.fn_train_data);
     }
-    printf("%s: number of training tokens: %d\n", __func__, train_tokens.size());
+    printf("%s: number of training tokens: %d\n", __func__, (int) train_tokens.size());
 
     struct my_llama_model model;
     model.hparams.n_vocab = llama_n_vocab(lctx);
@@ -3020,7 +3020,7 @@ int main(int argc, char ** argv) {
     std::vector<bool>   token_notavail;
     token_noccurs.resize(model.hparams.n_vocab, 0);
     token_notavail.resize(model.hparams.n_vocab, true);
-    for (int i=0; i<train_tokens.size(); ++i) {
+    for (int i = 0; i < (int) train_tokens.size(); ++i) {
         ++token_noccurs[train_tokens[i]];
         token_notavail[train_tokens[i]] = false;
     }
@@ -3028,7 +3028,7 @@ int main(int argc, char ** argv) {
     std::vector<float> token_freq;
     token_freq.resize(model.hparams.n_vocab, 0);
     int n_unique_tokens = 0;
-    for (int i=0; i<token_noccurs.size(); ++i) {
+    for (int i = 0; i < (int) token_noccurs.size(); ++i) {
         token_freq[i] = (float) token_noccurs[i] / (float) train_tokens.size();
         n_unique_tokens += (token_noccurs[i] > 0) ? 1 : 0;
     }
@@ -3104,26 +3104,26 @@ int main(int argc, char ** argv) {
     uint8_t * compute_buf_1 = new uint8_t[size_buf_1];
     uint8_t * compute_buf_2 = new uint8_t[size_buf_2];
 
-    GGML_ASSERT(train_tokens.size() > n_tokens);;
+    GGML_ASSERT(n_tokens < (int) train_tokens.size());
     std::vector<int> train_samples;
     train_samples.push_back(0);
-    for (int i=1; i<train_tokens.size()-n_tokens; ++i) {
+    for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
         if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl())) {
             train_samples.push_back(i);
         }
     }
     shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size());
-    for (int i=0; i<train_samples.size(); ++i) {
-        GGML_ASSERT(train_samples[i]+n_tokens-1 < train_tokens.size());
+    for (int i = 0; i < (int) train_samples.size(); ++i) {
+        GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
     }
 
     printf("%s: begin training\n", __func__);
 
-    for (int ex=0; ex<params.n_examples; ++ex) {
-        if (ex*n_batch >= train_samples.size()) {
+    for (int ex = 0; ex < params.n_examples; ++ex) {
+        if (ex*n_batch >= (int) train_samples.size()) {
             shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size());
-            for (int i=0; i<train_samples.size(); ++i) {
-                GGML_ASSERT(train_samples[i]+n_tokens-1 < train_tokens.size());
+            for (int i = 0; i < (int) train_samples.size(); ++i) {
+                GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
             }
         }
 
@@ -3134,11 +3134,11 @@ int main(int argc, char ** argv) {
         };
         struct ggml_context * ctx0 = ggml_init(cparams);
 
-        struct ggml_tensor * after_opt_best_samples  = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
-        struct ggml_tensor * after_opt_probs         = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
-        struct ggml_tensor * tokens_input            = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
-        struct ggml_tensor * target_logits           = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
-        struct ggml_tensor * target_probs            = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        //struct ggml_tensor * after_opt_probs        = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * tokens_input           = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        struct ggml_tensor * target_logits          = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * target_probs           = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
 
         int n_past = 0;
 
@@ -3293,8 +3293,8 @@ int main(int argc, char ** argv) {
             ggml_build_forward_expand(&gf, logits);
             ggml_graph_compute(ctx0, &gf);
 
-            struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
-            struct ggml_tensor * probs        = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
+            //struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
+            //struct ggml_tensor * probs        = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
 
             // set_logits_masked(logits, token_notavail, -1e9);
             int token = sample(&sampler,
diff --git a/ggml.c b/ggml.c
index e64dac632..b5eb9123c 100644
--- a/ggml.c
+++ b/ggml.c
@@ -10498,7 +10498,7 @@ static void ggml_compute_forward_out_prod_f32(
     const int64_t ne03 = src0->ne[3];
 
     const int64_t ne10 = src1->ne[0];
-    const int64_t ne11 = src1->ne[1];
+    //const int64_t ne11 = src1->ne[1];
     const int64_t ne12 = src1->ne[2];
     const int64_t ne13 = src1->ne[3];
 
@@ -10587,11 +10587,10 @@ static void ggml_compute_forward_out_prod_f32(
         const int64_t i02 = i2;
         const int64_t i03 = i3;
 
-        const int64_t i10 = i1;
+        //const int64_t i10 = i1;
         const int64_t i12 = i2;
         const int64_t i13 = i3;
 
-
         for (int64_t i01 = 0; i01 < ne01; ++i01) {
             const int64_t i11 = i01;
 
@@ -13956,8 +13955,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
         return;
     }
 
-    const float eps = 1e-9f;
-
+    const double eps = 1e-9;
 
     // rows per thread
     const int dr = (nr + nth - 1)/nth;
@@ -14002,7 +14000,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
             // sum = 1.0/sum;
         }
         // avoid log(0) by rescaling from [0..1] to [eps..1]
-        sum = (1.0f - eps) / sum;
+        sum = (1.0 - eps) / sum;
         ggml_vec_scale_f32(nc, st, sum);
         ggml_vec_add1_f32(nc, st, st, eps);
         ggml_vec_log_f32(nc, st, st);
@@ -14054,8 +14052,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
     const int64_t ith = params->ith;
     const int64_t nth = params->nth;
 
-    float * sums = (float *) params->wdata;
-
     if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
         return;
     }
@@ -14090,6 +14086,8 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
 #endif
         // step by step explanation:
         {
+            //float * sums = (float *) params->wdata;
+
             // forward pass with annotated gradients from backward pass
             // (built by going in reverse operation order, adding to gradients of current operation args)
             // st0 = exp(s0-max(s0))                                                       grad[st0] = grad[st1]*(1.0 - eps)/sum
@@ -14162,10 +14160,10 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
         float dot_st1_dst1 = 0;
         ggml_vec_scale_f32(nc, sm, sum);
         ggml_vec_cpy_f32  (nc, ds0, sm);
-        ggml_vec_scale_f32(nc, ds0, (1.0 - eps));
+        ggml_vec_scale_f32(nc, ds0, (1.0f - eps));
         ggml_vec_add1_f32 (nc, ds0, ds0, eps);
         ggml_vec_div_f32  (nc, ds0, s1, ds0);
-        ggml_vec_scale_f32(nc, ds0, -(1.0 - eps)*d[0]);
+        ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]);
         ggml_vec_dot_f32  (nc, &dot_st1_dst1, sm, ds0);
         ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1);
         ggml_vec_mul_f32  (nc, ds0, ds0, sm);
diff --git a/llama.h b/llama.h
index 3947cf3e2..4694c9c85 100644
--- a/llama.h
+++ b/llama.h
@@ -193,9 +193,9 @@ extern "C" {
     // Returns number of results.
     LLAMA_API int llama_get_vocab(
             const struct llama_context * ctx,
-            const char * * strings,
-            float  * scores,
-            int capacity);
+                          const char * * strings,
+                                 float * scores,
+                                   int   capacity);
 
     // Token logits obtained from the last call to llama_eval()
     // The logits for the last token are stored in the last row