From bc1c13bb6621a6236d8595cf110ac6ad1af0dd96 Mon Sep 17 00:00:00 2001 From: xaedes Date: Mon, 1 May 2023 22:22:00 +0200 Subject: [PATCH] train with two examples, creating new tensors each time.. --- examples/baby-llama/baby-llama.cpp | 152 ++++++++++++++++------------- 1 file changed, 86 insertions(+), 66 deletions(-) diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index c50baf470..0e5b3fbd7 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -539,7 +539,7 @@ void print_probs(struct ggml_tensor * probs) { for (int i=0; ine[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); - printf(" %.1f", p); + printf(" %.2f", p); } printf("\n"); } @@ -559,6 +559,21 @@ void print_tokens(struct ggml_tensor * tokens, int n_vocab) { } } +void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) { + int n_tokens = tokens_input->ne[0]; + int n_vocab = targets->ne[0]; + ggml_set_zero(targets); + for (int i=0; i 1.0f) ? 1.0f : z; // clamp to [0..1] + int token = (int)(z*(float)(n_vocab-1)); + ggml_set_f32_1d(targets, i*n_vocab + token, +1.0f); + ggml_set_i32_1d(tokens_input, i, token); + } +} + int main(int argc, char ** argv) { struct ggml_init_params lcparams; lcparams.mem_size = 1024*1024*1024; @@ -566,19 +581,26 @@ int main(int argc, char ** argv) { lcparams.no_alloc = false; struct llama_model model; - model.hparams.n_vocab = 16; - model.hparams.n_ctx = 64; - model.hparams.n_embd = 64; + model.hparams.n_vocab = 8; + model.hparams.n_ctx = 32; + model.hparams.n_embd = 32; model.hparams.n_mult = 2; model.hparams.n_head = 8; - model.hparams.n_layer = 16; + model.hparams.n_layer = 8; model.hparams.n_rot = 16; + + // model.hparams.n_embd = 32; + // model.hparams.n_mult = 2; + // model.hparams.n_head = 4; + // model.hparams.n_layer = 8; + // model.hparams.n_rot = 8; + model.ctx = ggml_init(lcparams); printf("init model\n"); init_model(&model); set_param_model(&model); - randomize_model(&model, 1337, 0.0f, 2.0f, -1.0f, +1.0f); + randomize_model(&model, 1337, 0.0f, 1.0f, -1.0f, +1.0f); // key + value cache for the self attention struct llama_kv_cache kv_self; @@ -593,68 +615,66 @@ int main(int argc, char ** argv) { struct ggml_context * ctx0 = model.ctx; // ggml_init(c0params); - int n_tokens = 64; - struct ggml_tensor * before_opt_best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); - struct ggml_tensor * before_opt_probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, model.hparams.n_vocab, n_tokens); - struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); - struct ggml_tensor * after_opt_probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, model.hparams.n_vocab, n_tokens); - struct ggml_tensor * tokens_input = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); - struct ggml_tensor * targets = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, model.hparams.n_vocab, n_tokens); - for (int i=0; i