train : fix compile warnings

2023-08-28 22:26:10 +03:00 · 2023-08-28 22:26:10 +03:00 · 93535a460a
commit 93535a460a
parent f6828cba9e
5 changed files with 33 additions and 29 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -15,6 +15,7 @@
 #include <string>
 #include <unordered_set>
 #include <vector>
 #include <cinttypes>
 #if defined(__APPLE__) && defined(__MACH__)
 #include <sys/types.h>
@ -938,8 +939,8 @@ std::string get_sortable_timestamp() {
    const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
        current_time.time_since_epoch() % 1000000000).count();
-    char timestamp_ns[10];
+    char timestamp_ns[11];
-    snprintf(timestamp_ns, 11, "%09ld", ns);
+    snprintf(timestamp_ns, 11, "%09" PRId64, ns);
    return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
 }
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@ -681,7 +681,6 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
    // for rms-att-weight
    int row_length = model->hparams.n_embd;
    const auto & hparams = model->hparams;
    int n_ff = model->hparams.n_ff;
    for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -314,15 +314,13 @@ void init_model(struct my_llama_model * model) {
    model->train_samples = 0;
    model->train_tokens = 0;
    const char * arch = "llama";
    std::vector<char> tn_buf;
    tn_buf.resize(GGML_MAX_NAME);
-    auto tn = [arch, &tn_buf](const char * key) -> const char * {
+    auto tn = [&tn_buf](const char * key) -> const char * {
        snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
        return tn_buf.data();
    };
-    auto tni = [arch, &tn_buf](const char * key, int bid) -> const char * {
+    auto tni = [&tn_buf](const char * key, int bid) -> const char * {
        snprintf(tn_buf.data(), tn_buf.size(), key, bid);
        std::string s = tn_buf.data();
        snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
@ -470,7 +468,7 @@ static size_t hash_find(void * hash_table[], void * p) {
 }
 static bool hash_insert(void * hash_table[], void * p) {
-    size_t h = hash(p);
+    //size_t h = hash(p);
    size_t i = hash_find(hash_table, p);
    GGML_ASSERT(i < GGML_GRAPH_HASHTABLE_SIZE); // assert that not full
@ -494,7 +492,7 @@ struct hash_map {
    void * keys[GGML_GRAPH_HASHTABLE_SIZE];
    void * vals[GGML_GRAPH_HASHTABLE_SIZE];
 };
-static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
+//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
 struct hash_map * new_hash_map() {
    struct hash_map * result = new struct hash_map;
@ -677,7 +675,6 @@ struct ggml_tensor * llama_build_train_graphs(
    const float f_norm_rms_eps  = hparams.f_norm_rms_eps;
    const float rope_freq_base  = hparams.rope_freq_base;
    const float rope_freq_scale = hparams.rope_freq_scale;
    const int rope_mode  = 0;
    auto set_name = [](struct ggml_tensor * t, const char * n) {
        ggml_set_name(t, n);
@ -687,8 +684,12 @@ struct ggml_tensor * llama_build_train_graphs(
    };
    // rope has so much parameters that we make a custom function for it
-    auto rope = [ctx, n_past, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale]
+    auto rope = [ctx, n_rot, n_ctx, rope_freq_base, rope_freq_scale]
                (struct ggml_tensor * t) -> struct ggml_tensor * {
        // not capturing these, to silcence warnings
        const int n_past    = 0;
        const int rope_mode = 0;
        return ggml_rope_custom(ctx,
            t, n_past, n_rot, rope_mode, n_ctx,
            rope_freq_base, rope_freq_scale);
@ -803,14 +804,14 @@ struct ggml_tensor * llama_build_train_graphs(
        }
        // allocating checkpoints in one block to reduce memory fragmentation
        // note: they will be freed in reverse order
-        for (int i = 0; i < checkpoints.size(); ++i) {
+        for (int i = 0; i < (int) checkpoints.size(); ++i) {
            if (checkpoints[i]->data == NULL && !ggml_is_view(checkpoints[i])) {
                ggml_allocr_alloc(alloc, checkpoints[i]);
            }
        }
-        int n_leafs_after = gb->n_leafs;
+        //int n_leafs_after = gb->n_leafs;
-        int n_nodes_after = gb->n_nodes;
+        //int n_nodes_after = gb->n_nodes;
        ggml_allocr_alloc_graph(alloc, gb);
@ -1061,6 +1062,8 @@ bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
    GGML_ASSERT(a->type == b->type);
    GGML_ASSERT(ggml_are_same_shape(a, b));
    GGML_ASSERT(ggml_is_contiguous(a) && ggml_is_contiguous(b));
    return true;
 }
 void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) {
@ -1217,11 +1220,11 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g
    std::vector<char> tn_buf;
    tn_buf.resize(GGML_MAX_NAME);
-    auto tn = [&arch, &tn_buf](const char * key) -> const char * {
+    auto tn = [&tn_buf](const char * key) -> const char * {
        snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
        return tn_buf.data();
    };
-    auto tni = [&arch, &tn_buf](const char * key, int bid) -> const char * {
+    auto tni = [&tn_buf](const char * key, int bid) -> const char * {
        snprintf(tn_buf.data(), tn_buf.size(), key, bid);
        std::string s = tn_buf.data();
        snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
@ -2194,7 +2197,7 @@ int main(int argc, char ** argv) {
        ggml_set_no_alloc(ctx0, false);
        // don't use alloc for input tensors, so we can safely fill them with data
-        struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        //struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
        //struct ggml_tensor * after_opt_probs        = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
        struct ggml_tensor * tokens_input           = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
        struct ggml_tensor * target_logits          = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
--- a/ggml.c
+++ b/ggml.c
@ -9448,6 +9448,8 @@ static void ggml_compute_forward_div_f32(
 #ifdef GGML_USE_ACCELERATE
            UNUSED(ggml_vec_div_f32);
            vDSP_vdiv(
                    (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
                    (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
@ -13936,7 +13938,7 @@ static void ggml_compute_forward_flash_attn_f32(
                vvexpf(S, S, &Mup);
                ggml_vec_sum_f32(Mup, &sum, S);
 #else
-                uint16_t   scvt[GGML_SOFT_MAX_UNROLL];
+                uint16_t   scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
                ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
                for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
@ -14530,7 +14532,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
                    vvexpf(SM, SM, &Mup);
                    ggml_vec_sum_f32(Mup, &sum, SM);
 #else
-                    uint16_t   scvt[GGML_SOFT_MAX_UNROLL];
+                    uint16_t   scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
                    ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
                    for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
@ -15330,7 +15332,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
            float max = -INFINITY;
            ggml_vec_max_f32(nc, &max, s0);
-            uint16_t scvt;
+            uint16_t scvt; UNUSED(scvt);
            for (int i = 0; i < nc; i++) {
                if (s0[i] == -INFINITY) {
                    st[i] = 0.0f;
@ -15410,7 +15412,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
        return;
    }
-    const double eps = 1e-9f;
+    const double eps = 1e-9;
    // TODO: handle transposed/permuted matrices
    const int64_t nc = src0->ne[0];
@ -15444,7 +15446,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
            float max = -INFINITY;
            ggml_vec_max_f32(nc, &max, s0);
-            uint16_t scvt;
+            uint16_t scvt; UNUSED(scvt);
            for (int i = 0; i < nc; i++) {
                if (s0[i] == -INFINITY) {
                    ds0[i] = 0.0f;
@ -18495,7 +18497,7 @@ static enum ggml_opt_result ggml_opt_adam(
                    const int64_t ne = ggml_nelements(ps[p]);
                    for (int64_t j = 0; j < ne; ++j) {
                        float g = ggml_get_f32_1d(ps[p]->grad, j);
-                        sum += g*g;
+                        sum += (ggml_float)(g*g);
                    }
                }
                ggml_float norm = sqrt(sum);
@ -18508,7 +18510,7 @@ static enum ggml_opt_result ggml_opt_adam(
            int64_t i = 0;
            for (int p = 0; p < np; ++p) {
                const int64_t ne = ggml_nelements(ps[p]);
-                const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched;
+                const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
                for (int64_t j = 0; j < ne; ++j) {
                    float x = ggml_get_f32_1d(ps[p], j);
                    float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm;
--- a/llama.cpp
+++ b/llama.cpp
@ -6248,7 +6248,6 @@ const char * llama_print_system_info(void) {
 }
 void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
    fprintf(stream, "\n");
    fprintf(stream, "###########\n");
    fprintf(stream, "# Timings #\n");
@ -6264,10 +6263,10 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
    fprintf(stream, "n_eval: %d  # number of tokens generated (excluding the first one)\n", ctx->n_eval);
    fprintf(stream, "n_p_eval: %d  # number of tokens processed in batches at the beginning\n", ctx->n_p_eval);
    fprintf(stream, "n_sample: %d  # number of sampled tokens\n", ctx->n_sample);
-    fprintf(stream, "t_eval_us: %ld  # total microseconds spent generating tokens\n", ctx->t_eval_us);
+    fprintf(stream, "t_eval_us: %" PRId64 "  # total microseconds spent generating tokens\n", ctx->t_eval_us);
-    fprintf(stream, "t_load_us: %ld  # total microseconds spent loading the model\n", ctx->t_load_us);
+    fprintf(stream, "t_load_us: %" PRId64 "  # total microseconds spent loading the model\n", ctx->t_load_us);
-    fprintf(stream, "t_p_eval_us: %ld  # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
+    fprintf(stream, "t_p_eval_us: %" PRId64 "  # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
-    fprintf(stream, "t_sample_us: %ld  # total microseconds spent sampling\n", ctx->t_sample_us);
+    fprintf(stream, "t_sample_us: %" PRId64 "  # total microseconds spent sampling\n", ctx->t_sample_us);
    fprintf(stream, "ts_eval: %.2f  # tokens / second during generation\n",
            1.0e6 * ctx->n_eval / ctx->t_eval_us);
    fprintf(stream, "ts_p_eval: %.2f  # tokens / second during prompt processing\n",