From d3acbf644e96fb5dd18d2bc9f4dd119c732a8f17 Mon Sep 17 00:00:00 2001 From: xaedes Date: Mon, 22 May 2023 20:53:57 +0200 Subject: [PATCH] simplify code --- examples/baby-llama/baby-llama-text.cpp | 222 +++++++++++++----------- 1 file changed, 122 insertions(+), 100 deletions(-) diff --git a/examples/baby-llama/baby-llama-text.cpp b/examples/baby-llama/baby-llama-text.cpp index 84ef911f8..9a193b81d 100644 --- a/examples/baby-llama/baby-llama-text.cpp +++ b/examples/baby-llama/baby-llama-text.cpp @@ -1144,58 +1144,9 @@ struct ggml_tensor * forward_batch_wo_cache( return inpL; } -void print_row(struct ggml_tensor * probs, int i) { - for (int k = 0; k < probs->ne[0]; ++k) { - float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); - printf(" %.2f", p); - } - printf("\n"); -} - -void print_matrix(struct ggml_tensor * probs) { - assert(probs->n_dims == 2); - for (int i = 0; i < probs->ne[1]; ++i) { - for (int k = 0; k < probs->ne[0]; ++k) { - float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); - printf(" %.2f", p); - } - printf("\n"); - } -} - - -void print_token(struct llama_context * ctx, llama_token token) { - printf("%s", llama_token_to_str(ctx, token)); -} - -void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) { - for (int i=0; ine[0]; ++i) { - int token = ggml_get_i32_1d(tokens, i); - print_token(ctx, token); - } -} - -void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) { - for (int i1=0; i1ne[1]; ++i1) { - int num_newline = 0; - for (int i0=0; i0ne[0]; ++i0) { - int token = ggml_get_i32_1d(tokens, i0 + i1*tokens->ne[0]); - bool isnl = (token == llama_token_nl()); - if (isnl) { - ++num_newline; - } - if (isnl) { - if (num_newline < 2) { - print_token(ctx, token); - } else { - printf("\\n"); - } - } else { - print_token(ctx, token); - } - } - printf("\n--\n"); - } +void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) { + float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]); + *ptr = value; } void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) { @@ -1218,13 +1169,65 @@ int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { return *ptr; } +void print_row(struct ggml_tensor * probs, int i) { + for (int k = 0; k < probs->ne[0]; ++k) { + float p = get_f32_2d(probs, k, i); + printf(" %.2f", p); + } + printf("\n"); +} + +void print_matrix(struct ggml_tensor * probs) { + assert(probs->n_dims == 2); + for (int i = 0; i < probs->ne[1]; ++i) { + for (int k = 0; k < probs->ne[0]; ++k) { + float p = get_f32_2d(probs, k, i); + printf(" %.2f", p); + } + printf("\n"); + } +} + + +void print_token(struct llama_context * ctx, llama_token token) { + printf("%s", llama_token_to_str(ctx, token)); +} + +void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) { + for (int i=0; ine[0]; ++i) { + int token = ggml_get_i32_1d(tokens, i); + print_token(ctx, token); + } +} + +void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) { + for (int i1=0; i1ne[1]; ++i1) { + int num_newline = 0; + for (int i0=0; i0ne[0]; ++i0) { + int token = get_i32_2d(tokens, i0, i1); + print_token(ctx, token); + // bool isnl = (token == llama_token_nl()); + // if (isnl) { + // ++num_newline; + // } + // if (isnl) { + // if (num_newline < 2) { + // print_token(ctx, token); + // } else { + // printf("\\n"); + // } + // } else { + // print_token(ctx, token); + // } + } + printf("\n--\n"); + } +} + void get_example_targets(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { int n_tokens = tokens_input->ne[0]; int n_vocab = target_logits->ne[0]; - const float eps = 1e-6f; - const float target_prob = 1.0f; - int sample = train_samples[example_id % n_train_samples]; GGML_ASSERT(sample+n_tokens-1 < n_train_data); @@ -1241,38 +1244,42 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons } } -void get_example_targets_batch(struct ggml_context * ctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { +void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { GGML_ASSERT(tokens_input->n_dims == 2); GGML_ASSERT(target_logits->n_dims == 3); GGML_ASSERT(target_probs->n_dims == 3); + int n_vocab = target_logits->ne[0]; int n_tokens = tokens_input->ne[0]; int n_batch = tokens_input->ne[1]; GGML_ASSERT(n_tokens == target_logits->ne[1]); GGML_ASSERT(n_batch == target_logits->ne[2]); + GGML_ASSERT(n_vocab == target_probs->ne[0]); GGML_ASSERT(n_tokens == target_probs->ne[1]); GGML_ASSERT(n_batch == target_probs->ne[2]); + ggml_set_f32(target_logits, -1.0f/n_vocab); + ggml_set_f32(target_probs, 0.0f); for (int k=0; kne[0], - k*tokens_input->nb[1]); - struct ggml_tensor * target_logits_k = ggml_view_2d(ctx, - target_logits, - target_logits->ne[0], - target_logits->ne[1], - target_logits->nb[1], - k*target_logits->nb[2]); - - struct ggml_tensor * target_probs_k = ggml_view_2d(ctx, - target_probs, - target_probs->ne[0], - target_probs->ne[1], - target_probs->nb[1], - k*target_probs->nb[2]); - - get_example_targets(train_samples, n_train_samples, train_data, n_train_data, - example_id*n_batch + k, tokens_input_k, target_logits_k, target_probs_k); + // printf("%s: batch %d\n", __func__, k); + int sample = train_samples[(example_id*n_batch + k) % n_train_samples]; + GGML_ASSERT(sample+n_tokens-1 < n_train_data); + + set_i32_2d(tokens_input, 0, k, llama_token_bos()); + for (int i=1; i= 0) { out.resize(n_tokens); } + bool verify = false; + if (verify) { + const char * in = buf.data(); + const char * end = buf.data() + buf.size(); + for (int i=0; i < out.size(); ++i) { + const char * s = llama_token_to_str(lctx, out[i]); + int len = strlen(s); + if (in >= end) { + printf("%s: unexpected end of original text.\n", __func__); + break; + } + const bool matches = (strncmp(in, s, len) == 0); + if (matches) { + in += len; + } else { + printf("%s: mismatch: expected '%s', but got '%s'\n", __func__, std::string(in, len).c_str(), s); + } + } + } + return n_tokens; } @@ -1841,9 +1867,9 @@ float cosine_decay_restart(int decay_steps, const float alpha, int step, float r } int main(int argc, char ** argv) { - const char * default_model = "ggml-vic7b-uncensored-q4_0.bin"; - const char * default_train = "shakespeare.txt"; - const char * default_chkpt_in = "checkpoint.bin"; + const char * default_model = "ggml-vic7b-uncensored-q4_0.bin"; + const char * default_train = "shakespeare.txt"; + const char * default_chkpt_in = "checkpoint.bin"; const char * default_chkpt_out = "checkpoint.bin"; const char * default_argv[5] = {argv[0], default_model, default_train, default_chkpt_in, default_chkpt_out}; @@ -1890,6 +1916,7 @@ int main(int argc, char ** argv) { ++token_noccurs[train_tokens[i]]; token_notavail[train_tokens[i]] = false; } + std::vector token_freq; token_freq.resize(model.hparams.n_vocab, 0); int n_unique_tokens = 0; @@ -1901,10 +1928,9 @@ int main(int argc, char ** argv) { struct my_llama_kv_cache kv_self; - int n_batch = 32; struct ggml_init_params lcparams; - lcparams.mem_size = 1024ll*1024ll*1024ll*8ll; + lcparams.mem_size = 1024ll*1024ll*1024ll*2ll; lcparams.mem_buffer = NULL; lcparams.no_alloc = false; @@ -1913,15 +1939,21 @@ int main(int argc, char ** argv) { my_llama_sampler sampler; + int n_threads = 6; + int n_batch = 32; + int n_examples = 32; - int n_threads = 6; - + bool samples_start_after_nl = false; bool use_adam = true; - int warmup = 100; - int cos_decay_steps = 1000; + int warmup = 100; + int cos_decay_steps = 1000; float cos_decay_restart = 1.1f; - float cos_decay_alpha = 0.0f; + float cos_decay_alpha = 0.0f; + + + int n_tokens = model.hparams.n_ctx; + int n_vocab = model.hparams.n_vocab; struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context)); memset(opt, 0, sizeof(struct ggml_opt_context)); @@ -1965,12 +1997,7 @@ int main(int argc, char ** argv) { size_t compute_size = 1024ll*1024ll*1024ll*32ll; uint8_t * compute_addr = new uint8_t[compute_size]; - - int n_examples = 256; - int n_tokens = model.hparams.n_ctx; - int n_vocab = model.hparams.n_vocab; - bool samples_start_after_nl = false; std::vector train_samples; train_samples.push_back(0); @@ -2012,18 +2039,14 @@ int main(int argc, char ** argv) { ggml_cgraph gf = {}; gf.n_threads = n_threads; - get_example_targets_batch(ctx0, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), ex, tokens_input, target_logits, target_probs); + get_example_targets_batch(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), ex, tokens_input, target_logits, target_probs); struct ggml_tensor * logits = (n_past == 0) ? forward_batch_wo_cache(&model, ctx0, &gf, tokens_input, n_tokens, n_batch) : forward_batch(&model, &kv_self, ctx0, &gf, tokens_input, n_tokens, n_past, n_batch); - // struct ggml_tensor * se = square_error_loss(ctx0, logits, target_logits); - struct ggml_tensor * ce = cross_entropy_loss(ctx0, logits, target_probs); - // struct ggml_tensor * e = ggml_add(ctx0, se, ce); - struct ggml_tensor * e = ce; - // struct ggml_tensor * e = se; + struct ggml_tensor * e = cross_entropy_loss(ctx0, logits, target_probs); ggml_build_forward_expand(&gf, e); ggml_graph_compute(ctx0, &gf); @@ -2043,9 +2066,8 @@ int main(int argc, char ** argv) { size_t used_mem_after_opt = ggml_used_mem(ctx0); model.train_its = opt->iter; - // model.train_its += use_adam ? opt_params_adam.adam.n_iter : opt_params_lbfgs.lbfgs.n_iter; model.train_samples += n_batch; - model.train_tokens += n_batch * n_tokens; + model.train_tokens += n_batch * n_tokens; ggml_build_forward_expand(&gf, e); ggml_graph_compute(ctx0, &gf);