minor : fix compile warnings + minor style changes

2023-06-11 11:49:01 +03:00 · 2023-06-11 11:49:01 +03:00 · e829421eda
commit e829421eda
parent 6b7487d104
3 changed files with 39 additions and 41 deletions
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -953,7 +953,7 @@ struct ggml_tensor * forward_batch_wo_cache(
    const int N = n_tokens;

    const auto & hparams = model->hparams;
-    const int n_ctx   = hparams.n_ctx;
+    //const int n_ctx   = hparams.n_ctx;
    const int n_vocab = hparams.n_vocab;
    const int n_embd  = hparams.n_embd;
    const int n_layer = hparams.n_layer;
@ -1181,7 +1181,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
    const int N = n_tokens;

    const auto & hparams = model->hparams;
-    const int n_ctx   = hparams.n_ctx;
+    //const int n_ctx   = hparams.n_ctx;
    const int n_vocab = hparams.n_vocab;
    const int n_embd  = hparams.n_embd;
    const int n_layer = hparams.n_layer;
@ -1368,7 +1368,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
    gf->work = NULL;

    const auto & hparams = model->hparams;
-    const int n_ctx      = hparams.n_ctx;
+    //const int n_ctx      = hparams.n_ctx;
    const int n_vocab    = hparams.n_vocab;
    const int n_embd     = hparams.n_embd;
    const int n_layer    = hparams.n_layer;
@ -1894,7 +1894,7 @@ void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {

 void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) {
    for (int i1=0; i1<tokens->ne[1]; ++i1) {
-        int num_newline = 0;
+        //int num_newline = 0;
        for (int i0=0; i0<tokens->ne[0]; ++i0) {
            int token = get_i32_2d(tokens, i0, i1);
            print_token(ctx, token);
@ -1920,7 +1920,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
    int n_tokens = tokens_input->ne[0];
    int n_vocab  = target_logits->ne[0];

-    int sample = train_samples[example_id % n_train_samples];
+    size_t sample = train_samples[example_id % n_train_samples];
    GGML_ASSERT(sample+n_tokens-1 < n_train_data);

    ggml_set_f32(target_logits, -1.0f/n_vocab);
@ -1936,7 +1936,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
    }
 }

-void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
+void get_example_targets_batch(struct llama_context * /*lctx*/, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
    GGML_ASSERT(tokens_input->n_dims  == 2);
    GGML_ASSERT(target_logits->n_dims == 3);
    GGML_ASSERT(target_probs->n_dims  == 3);
@ -1953,7 +1953,7 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa
    ggml_set_f32(target_probs, 0.0f);
    for (int k=0; k<n_batch; ++k) {
        // printf("%s: batch %d\n", __func__, k);
-        int sample = train_samples[(example_id*n_batch + k) % n_train_samples];
+        size_t sample = train_samples[(example_id*n_batch + k) % n_train_samples];
        GGML_ASSERT(sample+n_tokens-1 < n_train_data);

        set_i32_2d(tokens_input, 0, k, llama_token_bos());
@ -2120,7 +2120,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
    if (verify) {
        const char * in  = buf.data();
        const char * end = buf.data() + buf.size();
-        for (int i=0; i < out.size(); ++i) {
+        for (int i = 0; i < (int) out.size(); ++i) {
            const char * s = llama_token_to_str(lctx, out[i]);
            int len = strlen(s);
            if (in >= end) {
@ -2264,7 +2264,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
 }

 void set_logits_masked(struct ggml_tensor * logits, std::vector<bool>& mask, float value) {
-    GGML_ASSERT(logits->ne[0] == mask.size());
+    GGML_ASSERT(logits->ne[0] == (int64_t) mask.size());
    for (int i2 = 0; i2 < logits->ne[2]; ++i2) {
        for (int i1 = 0; i1 < logits->ne[1]; ++i1) {
            for (int i0 = 0; i0 < logits->ne[0]; ++i0) {
@ -2301,7 +2301,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
 }

 void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
-    uint32_t nd = file->read_u32();
+    int32_t nd = file->read_u32();
    GGML_ASSERT(nd == tensor->n_dims);

    uint32_t name_len       = file->read_u32();
@ -3003,7 +3003,7 @@ int main(int argc, char ** argv) {
    if (tokenize_file(lctx, params.fn_train_data, train_tokens) < 0) {
        fprintf(stderr, "%s: failed to tokenize file '%s'\n", __func__, params.fn_train_data);
    }
-    printf("%s: number of training tokens: %d\n", __func__, train_tokens.size());
+    printf("%s: number of training tokens: %d\n", __func__, (int) train_tokens.size());

    struct my_llama_model model;
    model.hparams.n_vocab = llama_n_vocab(lctx);
@ -3020,7 +3020,7 @@ int main(int argc, char ** argv) {
    std::vector<bool>   token_notavail;
    token_noccurs.resize(model.hparams.n_vocab, 0);
    token_notavail.resize(model.hparams.n_vocab, true);
-    for (int i=0; i<train_tokens.size(); ++i) {
+    for (int i = 0; i < (int) train_tokens.size(); ++i) {
        ++token_noccurs[train_tokens[i]];
        token_notavail[train_tokens[i]] = false;
    }
@ -3028,7 +3028,7 @@ int main(int argc, char ** argv) {
    std::vector<float> token_freq;
    token_freq.resize(model.hparams.n_vocab, 0);
    int n_unique_tokens = 0;
-    for (int i=0; i<token_noccurs.size(); ++i) {
+    for (int i = 0; i < (int) token_noccurs.size(); ++i) {
        token_freq[i] = (float) token_noccurs[i] / (float) train_tokens.size();
        n_unique_tokens += (token_noccurs[i] > 0) ? 1 : 0;
    }
@ -3104,26 +3104,26 @@ int main(int argc, char ** argv) {
    uint8_t * compute_buf_1 = new uint8_t[size_buf_1];
    uint8_t * compute_buf_2 = new uint8_t[size_buf_2];

-    GGML_ASSERT(train_tokens.size() > n_tokens);;
+    GGML_ASSERT(n_tokens < (int) train_tokens.size());
    std::vector<int> train_samples;
    train_samples.push_back(0);
-    for (int i=1; i<train_tokens.size()-n_tokens; ++i) {
+    for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
        if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl())) {
            train_samples.push_back(i);
        }
    }
    shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size());
-    for (int i=0; i<train_samples.size(); ++i) {
-        GGML_ASSERT(train_samples[i]+n_tokens-1 < train_tokens.size());
+    for (int i = 0; i < (int) train_samples.size(); ++i) {
+        GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
    }

    printf("%s: begin training\n", __func__);

-    for (int ex=0; ex<params.n_examples; ++ex) {
-        if (ex*n_batch >= train_samples.size()) {
+    for (int ex = 0; ex < params.n_examples; ++ex) {
+        if (ex*n_batch >= (int) train_samples.size()) {
            shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size());
-            for (int i=0; i<train_samples.size(); ++i) {
-                GGML_ASSERT(train_samples[i]+n_tokens-1 < train_tokens.size());
+            for (int i = 0; i < (int) train_samples.size(); ++i) {
+                GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
            }
        }

@ -3134,11 +3134,11 @@ int main(int argc, char ** argv) {
        };
        struct ggml_context * ctx0 = ggml_init(cparams);

-        struct ggml_tensor * after_opt_best_samples  = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
-        struct ggml_tensor * after_opt_probs         = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
-        struct ggml_tensor * tokens_input            = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
-        struct ggml_tensor * target_logits           = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
-        struct ggml_tensor * target_probs            = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        //struct ggml_tensor * after_opt_probs        = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * tokens_input           = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        struct ggml_tensor * target_logits          = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * target_probs           = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);

        int n_past = 0;

@ -3293,8 +3293,8 @@ int main(int argc, char ** argv) {
            ggml_build_forward_expand(&gf, logits);
            ggml_graph_compute(ctx0, &gf);

-            struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
-            struct ggml_tensor * probs        = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
+            //struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
+            //struct ggml_tensor * probs        = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);

            // set_logits_masked(logits, token_notavail, -1e9);
            int token = sample(&sampler,
--- a/ggml.c
+++ b/ggml.c
@ -10498,7 +10498,7 @@ static void ggml_compute_forward_out_prod_f32(
    const int64_t ne03 = src0->ne[3];

    const int64_t ne10 = src1->ne[0];
-    const int64_t ne11 = src1->ne[1];
+    //const int64_t ne11 = src1->ne[1];
    const int64_t ne12 = src1->ne[2];
    const int64_t ne13 = src1->ne[3];

@ -10587,11 +10587,10 @@ static void ggml_compute_forward_out_prod_f32(
        const int64_t i02 = i2;
        const int64_t i03 = i3;

-        const int64_t i10 = i1;
+        //const int64_t i10 = i1;
        const int64_t i12 = i2;
        const int64_t i13 = i3;

-
        for (int64_t i01 = 0; i01 < ne01; ++i01) {
            const int64_t i11 = i01;

@ -13956,8 +13955,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
        return;
    }

-    const float eps = 1e-9f;
-
+    const double eps = 1e-9;

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
@ -14002,7 +14000,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
            // sum = 1.0/sum;
        }
        // avoid log(0) by rescaling from [0..1] to [eps..1]
-        sum = (1.0f - eps) / sum;
+        sum = (1.0 - eps) / sum;
        ggml_vec_scale_f32(nc, st, sum);
        ggml_vec_add1_f32(nc, st, st, eps);
        ggml_vec_log_f32(nc, st, st);
@ -14054,8 +14052,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
    const int64_t ith = params->ith;
    const int64_t nth = params->nth;

-    float * sums = (float *) params->wdata;
-
    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }
@ -14090,6 +14086,8 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
 #endif
        // step by step explanation:
        {
+            //float * sums = (float *) params->wdata;
+
            // forward pass with annotated gradients from backward pass
            // (built by going in reverse operation order, adding to gradients of current operation args)
            // st0 = exp(s0-max(s0))                                                       grad[st0] = grad[st1]*(1.0 - eps)/sum
@ -14162,10 +14160,10 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
        float dot_st1_dst1 = 0;
        ggml_vec_scale_f32(nc, sm, sum);
        ggml_vec_cpy_f32  (nc, ds0, sm);
-        ggml_vec_scale_f32(nc, ds0, (1.0 - eps));
+        ggml_vec_scale_f32(nc, ds0, (1.0f - eps));
        ggml_vec_add1_f32 (nc, ds0, ds0, eps);
        ggml_vec_div_f32  (nc, ds0, s1, ds0);
-        ggml_vec_scale_f32(nc, ds0, -(1.0 - eps)*d[0]);
+        ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]);
        ggml_vec_dot_f32  (nc, &dot_st1_dst1, sm, ds0);
        ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1);
        ggml_vec_mul_f32  (nc, ds0, ds0, sm);
--- a/llama.h
+++ b/llama.h
@ -193,9 +193,9 @@ extern "C" {
    // Returns number of results.
    LLAMA_API int llama_get_vocab(
            const struct llama_context * ctx,
-            const char * * strings,
-            float  * scores,
-            int capacity);
+                          const char * * strings,
+                                 float * scores,
+                                   int   capacity);

    // Token logits obtained from the last call to llama_eval()
    // The logits for the last token are stored in the last row