From 93535a460a6850f639e81151f955d0799244c5de Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 28 Aug 2023 22:26:10 +0300
Subject: [PATCH] train : fix compile warnings

---
 common/common.cpp                             |  5 +--
 .../convert-llama2c-to-ggml.cpp               |  1 -
 .../train-text-from-scratch.cpp               | 31 ++++++++++---------
 ggml.c                                        | 16 +++++-----
 llama.cpp                                     |  9 +++---
 5 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/common/common.cpp b/common/common.cpp
index 4a0d43c13..90fe2e84e 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <unordered_set>
 #include <vector>
+#include <cinttypes>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <sys/types.h>
@@ -938,8 +939,8 @@ std::string get_sortable_timestamp() {
 
     const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
         current_time.time_since_epoch() % 1000000000).count();
-    char timestamp_ns[10];
-    snprintf(timestamp_ns, 11, "%09ld", ns);
+    char timestamp_ns[11];
+    snprintf(timestamp_ns, 11, "%09" PRId64, ns);
 
     return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
 }
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 51d90ea6a..e9e070b1f 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -681,7 +681,6 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
 
     // for rms-att-weight
     int row_length = model->hparams.n_embd;
-    const auto & hparams = model->hparams;
     int n_ff = model->hparams.n_ff;
 
     for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index c9bba95c7..6fe85d419 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -314,15 +314,13 @@ void init_model(struct my_llama_model * model) {
     model->train_samples = 0;
     model->train_tokens = 0;
 
-    const char * arch = "llama";
-
     std::vector<char> tn_buf;
     tn_buf.resize(GGML_MAX_NAME);
-    auto tn = [arch, &tn_buf](const char * key) -> const char * {
+    auto tn = [&tn_buf](const char * key) -> const char * {
         snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
         return tn_buf.data();
     };
-    auto tni = [arch, &tn_buf](const char * key, int bid) -> const char * {
+    auto tni = [&tn_buf](const char * key, int bid) -> const char * {
         snprintf(tn_buf.data(), tn_buf.size(), key, bid);
         std::string s = tn_buf.data();
         snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
@@ -470,7 +468,7 @@ static size_t hash_find(void * hash_table[], void * p) {
 }
 
 static bool hash_insert(void * hash_table[], void * p) {
-    size_t h = hash(p);
+    //size_t h = hash(p);
     size_t i = hash_find(hash_table, p);
 
     GGML_ASSERT(i < GGML_GRAPH_HASHTABLE_SIZE); // assert that not full
@@ -494,7 +492,7 @@ struct hash_map {
     void * keys[GGML_GRAPH_HASHTABLE_SIZE];
     void * vals[GGML_GRAPH_HASHTABLE_SIZE];
 };
-static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
+//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
 
 struct hash_map * new_hash_map() {
     struct hash_map * result = new struct hash_map;
@@ -677,7 +675,6 @@ struct ggml_tensor * llama_build_train_graphs(
     const float f_norm_rms_eps  = hparams.f_norm_rms_eps;
     const float rope_freq_base  = hparams.rope_freq_base;
     const float rope_freq_scale = hparams.rope_freq_scale;
-    const int rope_mode  = 0;
 
     auto set_name = [](struct ggml_tensor * t, const char * n) {
         ggml_set_name(t, n);
@@ -687,8 +684,12 @@ struct ggml_tensor * llama_build_train_graphs(
     };
 
     // rope has so much parameters that we make a custom function for it
-    auto rope = [ctx, n_past, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale]
+    auto rope = [ctx, n_rot, n_ctx, rope_freq_base, rope_freq_scale]
                 (struct ggml_tensor * t) -> struct ggml_tensor * {
+        // not capturing these, to silcence warnings
+        const int n_past    = 0;
+        const int rope_mode = 0;
+
         return ggml_rope_custom(ctx,
             t, n_past, n_rot, rope_mode, n_ctx,
             rope_freq_base, rope_freq_scale);
@@ -803,14 +804,14 @@ struct ggml_tensor * llama_build_train_graphs(
         }
         // allocating checkpoints in one block to reduce memory fragmentation
         // note: they will be freed in reverse order
-        for (int i = 0; i < checkpoints.size(); ++i) {
+        for (int i = 0; i < (int) checkpoints.size(); ++i) {
             if (checkpoints[i]->data == NULL && !ggml_is_view(checkpoints[i])) {
                 ggml_allocr_alloc(alloc, checkpoints[i]);
             }
         }
 
-        int n_leafs_after = gb->n_leafs;
-        int n_nodes_after = gb->n_nodes;
+        //int n_leafs_after = gb->n_leafs;
+        //int n_nodes_after = gb->n_nodes;
 
         ggml_allocr_alloc_graph(alloc, gb);
 
@@ -1061,6 +1062,8 @@ bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
     GGML_ASSERT(a->type == b->type);
     GGML_ASSERT(ggml_are_same_shape(a, b));
     GGML_ASSERT(ggml_is_contiguous(a) && ggml_is_contiguous(b));
+
+    return true;
 }
 
 void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) {
@@ -1217,11 +1220,11 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g
 
     std::vector<char> tn_buf;
     tn_buf.resize(GGML_MAX_NAME);
-    auto tn = [&arch, &tn_buf](const char * key) -> const char * {
+    auto tn = [&tn_buf](const char * key) -> const char * {
         snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
         return tn_buf.data();
     };
-    auto tni = [&arch, &tn_buf](const char * key, int bid) -> const char * {
+    auto tni = [&tn_buf](const char * key, int bid) -> const char * {
         snprintf(tn_buf.data(), tn_buf.size(), key, bid);
         std::string s = tn_buf.data();
         snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
@@ -2194,7 +2197,7 @@ int main(int argc, char ** argv) {
         ggml_set_no_alloc(ctx0, false);
 
         // don't use alloc for input tensors, so we can safely fill them with data
-        struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        //struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
         //struct ggml_tensor * after_opt_probs        = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
         struct ggml_tensor * tokens_input           = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
         struct ggml_tensor * target_logits          = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
diff --git a/ggml.c b/ggml.c
index 8dc37433e..9a787863d 100644
--- a/ggml.c
+++ b/ggml.c
@@ -9448,6 +9448,8 @@ static void ggml_compute_forward_div_f32(
 
 
 #ifdef GGML_USE_ACCELERATE
+            UNUSED(ggml_vec_div_f32);
+
             vDSP_vdiv(
                     (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
                     (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
@@ -13936,7 +13938,7 @@ static void ggml_compute_forward_flash_attn_f32(
                 vvexpf(S, S, &Mup);
                 ggml_vec_sum_f32(Mup, &sum, S);
 #else
-                uint16_t   scvt[GGML_SOFT_MAX_UNROLL];
+                uint16_t   scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
                 ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
 
                 for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
@@ -14530,7 +14532,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
                     vvexpf(SM, SM, &Mup);
                     ggml_vec_sum_f32(Mup, &sum, SM);
 #else
-                    uint16_t   scvt[GGML_SOFT_MAX_UNROLL];
+                    uint16_t   scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
                     ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
 
                     for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
@@ -15330,7 +15332,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
             float max = -INFINITY;
             ggml_vec_max_f32(nc, &max, s0);
 
-            uint16_t scvt;
+            uint16_t scvt; UNUSED(scvt);
             for (int i = 0; i < nc; i++) {
                 if (s0[i] == -INFINITY) {
                     st[i] = 0.0f;
@@ -15410,7 +15412,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
         return;
     }
 
-    const double eps = 1e-9f;
+    const double eps = 1e-9;
 
     // TODO: handle transposed/permuted matrices
     const int64_t nc = src0->ne[0];
@@ -15444,7 +15446,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
             float max = -INFINITY;
             ggml_vec_max_f32(nc, &max, s0);
 
-            uint16_t scvt;
+            uint16_t scvt; UNUSED(scvt);
             for (int i = 0; i < nc; i++) {
                 if (s0[i] == -INFINITY) {
                     ds0[i] = 0.0f;
@@ -18495,7 +18497,7 @@ static enum ggml_opt_result ggml_opt_adam(
                     const int64_t ne = ggml_nelements(ps[p]);
                     for (int64_t j = 0; j < ne; ++j) {
                         float g = ggml_get_f32_1d(ps[p]->grad, j);
-                        sum += g*g;
+                        sum += (ggml_float)(g*g);
                     }
                 }
                 ggml_float norm = sqrt(sum);
@@ -18508,7 +18510,7 @@ static enum ggml_opt_result ggml_opt_adam(
             int64_t i = 0;
             for (int p = 0; p < np; ++p) {
                 const int64_t ne = ggml_nelements(ps[p]);
-                const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched;
+                const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
                 for (int64_t j = 0; j < ne; ++j) {
                     float x = ggml_get_f32_1d(ps[p], j);
                     float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm;
diff --git a/llama.cpp b/llama.cpp
index 11697ee65..7cb468538 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6248,7 +6248,6 @@ const char * llama_print_system_info(void) {
 }
 
 void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
-
     fprintf(stream, "\n");
     fprintf(stream, "###########\n");
     fprintf(stream, "# Timings #\n");
@@ -6264,10 +6263,10 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
     fprintf(stream, "n_eval: %d  # number of tokens generated (excluding the first one)\n", ctx->n_eval);
     fprintf(stream, "n_p_eval: %d  # number of tokens processed in batches at the beginning\n", ctx->n_p_eval);
     fprintf(stream, "n_sample: %d  # number of sampled tokens\n", ctx->n_sample);
-    fprintf(stream, "t_eval_us: %ld  # total microseconds spent generating tokens\n", ctx->t_eval_us);
-    fprintf(stream, "t_load_us: %ld  # total microseconds spent loading the model\n", ctx->t_load_us);
-    fprintf(stream, "t_p_eval_us: %ld  # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
-    fprintf(stream, "t_sample_us: %ld  # total microseconds spent sampling\n", ctx->t_sample_us);
+    fprintf(stream, "t_eval_us: %" PRId64 "  # total microseconds spent generating tokens\n", ctx->t_eval_us);
+    fprintf(stream, "t_load_us: %" PRId64 "  # total microseconds spent loading the model\n", ctx->t_load_us);
+    fprintf(stream, "t_p_eval_us: %" PRId64 "  # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
+    fprintf(stream, "t_sample_us: %" PRId64 "  # total microseconds spent sampling\n", ctx->t_sample_us);
     fprintf(stream, "ts_eval: %.2f  # tokens / second during generation\n",
             1.0e6 * ctx->n_eval / ctx->t_eval_us);
     fprintf(stream, "ts_p_eval: %.2f  # tokens / second during prompt processing\n",