From be7e564b112baca4581b96275112cf35e59570a7 Mon Sep 17 00:00:00 2001 From: xaedes Date: Wed, 16 Aug 2023 16:21:43 +0200 Subject: [PATCH] bug fixes to make finetune compile automatic allocator does not work yet --- examples/finetune/finetune.cpp | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index bdc717ffe..d7c0f3623 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -236,6 +236,8 @@ struct my_llama_model { }; struct my_llama_lora_hparams { + uint32_t lora_r = 1; + uint32_t lora_alpha = 1; uint32_t n_rank_attention_norm = 1; uint32_t n_rank_wq = 4; uint32_t n_rank_wk = 4; @@ -333,7 +335,7 @@ void print_lora_params(struct my_llama_lora_hparams * params) { printf("%s: n_rank_output : %u\n", __func__, params->n_rank_output); } -void init_model(const struct llama_model * input, struct my_llama_model * model, uint32_t n_ctx) { +void init_model(struct llama_model * input, struct my_llama_model * model, uint32_t n_ctx) { auto & hparams = model->hparams; hparams.n_vocab = llama_n_vocab_from_model(input); @@ -350,10 +352,6 @@ void init_model(const struct llama_model * input, struct my_llama_model * model, const uint32_t n_ff = get_n_ff(&hparams); - model->train_its = 0; - model->train_samples = 0; - model->train_tokens = 0; - model->tok_embeddings = llama_get_model_tok_embeddings(input); model->norm = llama_get_model_norm(input); model->output = llama_get_model_output(input); @@ -589,7 +587,7 @@ struct ggml_tensor * forward( const int n_head = hparams.n_head; const int n_rot = hparams.n_rot; - GGML_ASSERT(n_layer == lora.layers.size()); + GGML_ASSERT(n_layer == lora->layers.size()); struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens)); @@ -801,9 +799,7 @@ struct ggml_tensor * forward( // inpL shape [n_embd,N,1,1] inpL = ggml_mul(ctx0, ggml_repeat(ctx0, - ggml_add(ctx0, - model->norm, - lora->norm), + norm, inpL), inpL); @@ -1073,7 +1069,7 @@ struct ggml_tensor * llama_build_lora_finetune_graphs( const int n_ff = get_n_ff(&hparams); const int rope_mode = 0; - GGML_ASSERT(n_layer == lora.layers.size()); + GGML_ASSERT(n_layer == lora->layers.size()); auto set_name = [](struct ggml_tensor * t, const char * n) { ggml_set_name(t, n); @@ -1117,6 +1113,7 @@ struct ggml_tensor * llama_build_lora_finetune_graphs( struct ggml_tensor * wq = ggml_add(ctx, layer.wq, ggml_mul_mat(ctx, llayer.wq_a, llayer.wq_b)); struct ggml_tensor * wk = ggml_add(ctx, layer.wk, ggml_mul_mat(ctx, llayer.wk_a, llayer.wk_b)); struct ggml_tensor * wv = ggml_add(ctx, layer.wv, ggml_mul_mat(ctx, llayer.wv_a, llayer.wv_b)); + struct ggml_tensor * wo = ggml_add(ctx, layer.wo, ggml_mul_mat(ctx, llayer.wo_a, llayer.wo_b)); struct ggml_tensor * w1 = ggml_add(ctx, layer.w1, ggml_mul_mat(ctx, llayer.w1_a, llayer.w1_b)); struct ggml_tensor * w2 = ggml_add(ctx, layer.w2, ggml_mul_mat(ctx, llayer.w2_a, llayer.w2_b)); struct ggml_tensor * w3 = ggml_add(ctx, layer.w3, ggml_mul_mat(ctx, llayer.w3_a, llayer.w3_b)); @@ -1878,7 +1875,7 @@ void save_checkpoint(struct my_llama_model * model, struct my_llama_lora * lora, write_opt_context(&file, opt); } -bool load_checkpoint(struct my_llama_lora * model, struct my_llama_lora * lora, struct ggml_opt_context * opt, const char * filename, bool init) { +bool load_checkpoint(struct my_llama_model * model, struct my_llama_lora * lora, struct ggml_opt_context * opt, const char * filename, bool init) { struct llama_file file(filename, "rb"); uint32_t magic; @@ -1971,7 +1968,7 @@ bool load_checkpoint(struct my_llama_lora * model, struct my_llama_lora * lora, read_tensor(&file, layer.w3_b); } - read_opt_context(&file, model->ctx, opt); + read_opt_context(&file, lora->ctx, opt); } return (file.fp != NULL); @@ -2638,6 +2635,7 @@ int main(int argc, char ** argv) { struct llama_context_params llama_params = llama_context_default_params(); llama_params.vocab_only = false; + printf("%s: model base = '%s'\n", __func__, params.fn_model_base); struct llama_model * lmodel = llama_load_model_from_file(params.fn_model_base, llama_params); struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); @@ -2670,7 +2668,9 @@ int main(int argc, char ** argv) { struct my_llama_model model; init_model(lmodel, &model, params.n_ctx); - struct my_llama_model lora; + struct my_llama_lora lora; + lora.hparams.lora_r = params.lora_r; + lora.hparams.lora_alpha = params.lora_alpha; lora.hparams.n_rank_attention_norm = params.n_rank_attention_norm; lora.hparams.n_rank_wq = params.n_rank_wq; lora.hparams.n_rank_wk = params.n_rank_wk; @@ -2753,17 +2753,17 @@ int main(int argc, char ** argv) { opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs; printf("%s: init model\n", __func__); - bool existed = load_checkpoint(&model, opt, params.fn_checkpoint_in, true); + bool existed = load_checkpoint(&model, &lora, opt, params.fn_checkpoint_in, true); set_param_lora(&lora); opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs; - opt->iter = model.train_its; + opt->iter = lora.train_its; printf("%s: opt iter %d\n", __func__, opt->iter); bool from_scratch = !existed; if (from_scratch) { - randomize_model(&model, params.seed, 0.0f, 1.0f, -1.0f, +1.0f); + randomize_lora(&lora, params.seed, 0.0f, 1.0f, -1.0f, +1.0f); } init_kv_cache(&kv_self, &model, 1); @@ -2894,9 +2894,9 @@ int main(int argc, char ** argv) { size_t used_mem_after_opt = ggml_used_mem(ctx0); int n_iter = params.use_adam ? params.adam_n_iter : params.lbfgs_n_iter; - model.train_its = opt->iter; - model.train_samples += n_batch * n_iter; - model.train_tokens += n_batch * n_tokens * n_iter; + lora.train_its = opt->iter; + lora.train_samples += n_batch * n_iter; + lora.train_tokens += n_batch * n_tokens * n_iter; if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) { printf("Example %d, opt iter %d\n", ex, opt->iter); @@ -2993,7 +2993,7 @@ int main(int argc, char ** argv) { struct ggml_cgraph * gf = ggml_new_graph(ctx0); int n_past = 0; - struct ggml_tensor * logits = forward(&model, &kv_self, ctx0, gf, tokens_input, sample_ctx, n_past); + struct ggml_tensor * logits = forward(&model, &lora, &kv_self, ctx0, gf, tokens_input, sample_ctx, n_past); ggml_build_forward_expand(gf, logits); ggml_graph_compute_helper(work_buffer, gf, params.n_threads);