From 93535a460a6850f639e81151f955d0799244c5de Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 28 Aug 2023 22:26:10 +0300 Subject: [PATCH] train : fix compile warnings --- common/common.cpp | 5 +-- .../convert-llama2c-to-ggml.cpp | 1 - .../train-text-from-scratch.cpp | 31 ++++++++++--------- ggml.c | 16 +++++----- llama.cpp | 9 +++--- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 4a0d43c13..90fe2e84e 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #if defined(__APPLE__) && defined(__MACH__) #include @@ -938,8 +939,8 @@ std::string get_sortable_timestamp() { const int64_t ns = std::chrono::duration_cast( current_time.time_since_epoch() % 1000000000).count(); - char timestamp_ns[10]; - snprintf(timestamp_ns, 11, "%09ld", ns); + char timestamp_ns[11]; + snprintf(timestamp_ns, 11, "%09" PRId64, ns); return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns); } diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 51d90ea6a..e9e070b1f 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -681,7 +681,6 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod // for rms-att-weight int row_length = model->hparams.n_embd; - const auto & hparams = model->hparams; int n_ff = model->hparams.n_ff; for (uint32_t i = 0; i < model->hparams.n_layer; ++i){ diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index c9bba95c7..6fe85d419 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -314,15 +314,13 @@ void init_model(struct my_llama_model * model) { model->train_samples = 0; model->train_tokens = 0; - const char * arch = "llama"; - std::vector tn_buf; tn_buf.resize(GGML_MAX_NAME); - auto tn = [arch, &tn_buf](const char * key) -> const char * { + auto tn = [&tn_buf](const char * key) -> const char * { snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key); return tn_buf.data(); }; - auto tni = [arch, &tn_buf](const char * key, int bid) -> const char * { + auto tni = [&tn_buf](const char * key, int bid) -> const char * { snprintf(tn_buf.data(), tn_buf.size(), key, bid); std::string s = tn_buf.data(); snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str()); @@ -470,7 +468,7 @@ static size_t hash_find(void * hash_table[], void * p) { } static bool hash_insert(void * hash_table[], void * p) { - size_t h = hash(p); + //size_t h = hash(p); size_t i = hash_find(hash_table, p); GGML_ASSERT(i < GGML_GRAPH_HASHTABLE_SIZE); // assert that not full @@ -494,7 +492,7 @@ struct hash_map { void * keys[GGML_GRAPH_HASHTABLE_SIZE]; void * vals[GGML_GRAPH_HASHTABLE_SIZE]; }; -static const size_t HASH_MAP_SIZE = sizeof(struct hash_map); +//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map); struct hash_map * new_hash_map() { struct hash_map * result = new struct hash_map; @@ -677,7 +675,6 @@ struct ggml_tensor * llama_build_train_graphs( const float f_norm_rms_eps = hparams.f_norm_rms_eps; const float rope_freq_base = hparams.rope_freq_base; const float rope_freq_scale = hparams.rope_freq_scale; - const int rope_mode = 0; auto set_name = [](struct ggml_tensor * t, const char * n) { ggml_set_name(t, n); @@ -687,8 +684,12 @@ struct ggml_tensor * llama_build_train_graphs( }; // rope has so much parameters that we make a custom function for it - auto rope = [ctx, n_past, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale] + auto rope = [ctx, n_rot, n_ctx, rope_freq_base, rope_freq_scale] (struct ggml_tensor * t) -> struct ggml_tensor * { + // not capturing these, to silcence warnings + const int n_past = 0; + const int rope_mode = 0; + return ggml_rope_custom(ctx, t, n_past, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale); @@ -803,14 +804,14 @@ struct ggml_tensor * llama_build_train_graphs( } // allocating checkpoints in one block to reduce memory fragmentation // note: they will be freed in reverse order - for (int i = 0; i < checkpoints.size(); ++i) { + for (int i = 0; i < (int) checkpoints.size(); ++i) { if (checkpoints[i]->data == NULL && !ggml_is_view(checkpoints[i])) { ggml_allocr_alloc(alloc, checkpoints[i]); } } - int n_leafs_after = gb->n_leafs; - int n_nodes_after = gb->n_nodes; + //int n_leafs_after = gb->n_leafs; + //int n_nodes_after = gb->n_nodes; ggml_allocr_alloc_graph(alloc, gb); @@ -1061,6 +1062,8 @@ bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) { GGML_ASSERT(a->type == b->type); GGML_ASSERT(ggml_are_same_shape(a, b)); GGML_ASSERT(ggml_is_contiguous(a) && ggml_is_contiguous(b)); + + return true; } void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) { @@ -1217,11 +1220,11 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g std::vector tn_buf; tn_buf.resize(GGML_MAX_NAME); - auto tn = [&arch, &tn_buf](const char * key) -> const char * { + auto tn = [&tn_buf](const char * key) -> const char * { snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key); return tn_buf.data(); }; - auto tni = [&arch, &tn_buf](const char * key, int bid) -> const char * { + auto tni = [&tn_buf](const char * key, int bid) -> const char * { snprintf(tn_buf.data(), tn_buf.size(), key, bid); std::string s = tn_buf.data(); snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str()); @@ -2194,7 +2197,7 @@ int main(int argc, char ** argv) { ggml_set_no_alloc(ctx0, false); // don't use alloc for input tensors, so we can safely fill them with data - struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); + //struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); //struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); diff --git a/ggml.c b/ggml.c index 8dc37433e..9a787863d 100644 --- a/ggml.c +++ b/ggml.c @@ -9448,6 +9448,8 @@ static void ggml_compute_forward_div_f32( #ifdef GGML_USE_ACCELERATE + UNUSED(ggml_vec_div_f32); + vDSP_vdiv( (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, @@ -13936,7 +13938,7 @@ static void ggml_compute_forward_flash_attn_f32( vvexpf(S, S, &Mup); ggml_vec_sum_f32(Mup, &sum, S); #else - uint16_t scvt[GGML_SOFT_MAX_UNROLL]; + uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt); ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { @@ -14530,7 +14532,7 @@ static void ggml_compute_forward_flash_attn_back_f32( vvexpf(SM, SM, &Mup); ggml_vec_sum_f32(Mup, &sum, SM); #else - uint16_t scvt[GGML_SOFT_MAX_UNROLL]; + uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt); ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { @@ -15330,7 +15332,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32( float max = -INFINITY; ggml_vec_max_f32(nc, &max, s0); - uint16_t scvt; + uint16_t scvt; UNUSED(scvt); for (int i = 0; i < nc; i++) { if (s0[i] == -INFINITY) { st[i] = 0.0f; @@ -15410,7 +15412,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( return; } - const double eps = 1e-9f; + const double eps = 1e-9; // TODO: handle transposed/permuted matrices const int64_t nc = src0->ne[0]; @@ -15444,7 +15446,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( float max = -INFINITY; ggml_vec_max_f32(nc, &max, s0); - uint16_t scvt; + uint16_t scvt; UNUSED(scvt); for (int i = 0; i < nc; i++) { if (s0[i] == -INFINITY) { ds0[i] = 0.0f; @@ -18495,7 +18497,7 @@ static enum ggml_opt_result ggml_opt_adam( const int64_t ne = ggml_nelements(ps[p]); for (int64_t j = 0; j < ne; ++j) { float g = ggml_get_f32_1d(ps[p]->grad, j); - sum += g*g; + sum += (ggml_float)(g*g); } } ggml_float norm = sqrt(sum); @@ -18508,7 +18510,7 @@ static enum ggml_opt_result ggml_opt_adam( int64_t i = 0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]); - const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched; + const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched; for (int64_t j = 0; j < ne; ++j) { float x = ggml_get_f32_1d(ps[p], j); float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm; diff --git a/llama.cpp b/llama.cpp index 11697ee65..7cb468538 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6248,7 +6248,6 @@ const char * llama_print_system_info(void) { } void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) { - fprintf(stream, "\n"); fprintf(stream, "###########\n"); fprintf(stream, "# Timings #\n"); @@ -6264,10 +6263,10 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) { fprintf(stream, "n_eval: %d # number of tokens generated (excluding the first one)\n", ctx->n_eval); fprintf(stream, "n_p_eval: %d # number of tokens processed in batches at the beginning\n", ctx->n_p_eval); fprintf(stream, "n_sample: %d # number of sampled tokens\n", ctx->n_sample); - fprintf(stream, "t_eval_us: %ld # total microseconds spent generating tokens\n", ctx->t_eval_us); - fprintf(stream, "t_load_us: %ld # total microseconds spent loading the model\n", ctx->t_load_us); - fprintf(stream, "t_p_eval_us: %ld # total microseconds spent prompt processing\n", ctx->t_p_eval_us); - fprintf(stream, "t_sample_us: %ld # total microseconds spent sampling\n", ctx->t_sample_us); + fprintf(stream, "t_eval_us: %" PRId64 " # total microseconds spent generating tokens\n", ctx->t_eval_us); + fprintf(stream, "t_load_us: %" PRId64 " # total microseconds spent loading the model\n", ctx->t_load_us); + fprintf(stream, "t_p_eval_us: %" PRId64 " # total microseconds spent prompt processing\n", ctx->t_p_eval_us); + fprintf(stream, "t_sample_us: %" PRId64 " # total microseconds spent sampling\n", ctx->t_sample_us); fprintf(stream, "ts_eval: %.2f # tokens / second during generation\n", 1.0e6 * ctx->n_eval / ctx->t_eval_us); fprintf(stream, "ts_p_eval: %.2f # tokens / second during prompt processing\n",