From f1d51d144b8c97a775410f3ee4a9dffd3f7dd3dd Mon Sep 17 00:00:00 2001 From: xaedes Date: Sat, 6 May 2023 14:13:55 +0200 Subject: [PATCH] train on multiple examples, generate & print tokens with trained model afterwards ctx0 for evaluation and optimization is renewed for each sample --- examples/baby-llama/baby-llama.cpp | 153 +++++++++++++++++++++++------ 1 file changed, 121 insertions(+), 32 deletions(-) diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 0e5b3fbd7..ad5817c86 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -534,6 +534,14 @@ void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, str } } +void print_probs1(struct ggml_tensor * probs, int i) { + for (int k = 0; k < probs->ne[0]; ++k) { + float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); + printf(" %.2f", p); + } + printf("\n"); +} + void print_probs(struct ggml_tensor * probs) { assert(probs->n_dims == 2); for (int i=0; ine[1]; ++i) { @@ -545,28 +553,34 @@ void print_probs(struct ggml_tensor * probs) { } } +void print_token(int token, int n_vocab) { + for (int k = 0; k < token; ++k) { + printf(" "); + } + printf("X"); + for (int k = token+1; k < n_vocab; ++k) { + printf(" "); + } + printf("\n"); +} + void print_tokens(struct ggml_tensor * tokens, int n_vocab) { for (int i=0; ine[0]; ++i) { int token = ggml_get_i32_1d(tokens, i); - for (int k = 0; k < token; ++k) { - printf(" "); - } - printf("X"); - for (int k = token+1; k < n_vocab; ++k) { - printf(" "); - } - printf("\n"); + print_token(token, n_vocab); } } void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) { int n_tokens = tokens_input->ne[0]; int n_vocab = targets->ne[0]; + float randomness = 0.0f; ggml_set_zero(targets); for (int i=0; i 1.0f) ? 1.0f : z; // clamp to [0..1] int token = (int)(z*(float)(n_vocab-1)); ggml_set_f32_1d(targets, i*n_vocab + token, +1.0f); @@ -574,6 +588,17 @@ void get_example_targets(int example_id, struct ggml_tensor * tokens_input, stru } } +void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * targets, int n_shift) { + int n_tokens = tokens_input->ne[0]; + int n_vocab = targets->ne[0]; + for (int i=0; i