bug fixes to make finetune compile

automatic allocator does not work yet
This commit is contained in:
xaedes 2023-08-16 16:21:43 +02:00
parent 50b1e66200
commit be7e564b11
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -236,6 +236,8 @@ struct my_llama_model {
}; };
struct my_llama_lora_hparams { struct my_llama_lora_hparams {
uint32_t lora_r = 1;
uint32_t lora_alpha = 1;
uint32_t n_rank_attention_norm = 1; uint32_t n_rank_attention_norm = 1;
uint32_t n_rank_wq = 4; uint32_t n_rank_wq = 4;
uint32_t n_rank_wk = 4; uint32_t n_rank_wk = 4;
@ -333,7 +335,7 @@ void print_lora_params(struct my_llama_lora_hparams * params) {
printf("%s: n_rank_output : %u\n", __func__, params->n_rank_output); printf("%s: n_rank_output : %u\n", __func__, params->n_rank_output);
} }
void init_model(const struct llama_model * input, struct my_llama_model * model, uint32_t n_ctx) { void init_model(struct llama_model * input, struct my_llama_model * model, uint32_t n_ctx) {
auto & hparams = model->hparams; auto & hparams = model->hparams;
hparams.n_vocab = llama_n_vocab_from_model(input); hparams.n_vocab = llama_n_vocab_from_model(input);
@ -350,10 +352,6 @@ void init_model(const struct llama_model * input, struct my_llama_model * model,
const uint32_t n_ff = get_n_ff(&hparams); const uint32_t n_ff = get_n_ff(&hparams);
model->train_its = 0;
model->train_samples = 0;
model->train_tokens = 0;
model->tok_embeddings = llama_get_model_tok_embeddings(input); model->tok_embeddings = llama_get_model_tok_embeddings(input);
model->norm = llama_get_model_norm(input); model->norm = llama_get_model_norm(input);
model->output = llama_get_model_output(input); model->output = llama_get_model_output(input);
@ -589,7 +587,7 @@ struct ggml_tensor * forward(
const int n_head = hparams.n_head; const int n_head = hparams.n_head;
const int n_rot = hparams.n_rot; const int n_rot = hparams.n_rot;
GGML_ASSERT(n_layer == lora.layers.size()); GGML_ASSERT(n_layer == lora->layers.size());
struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens)); memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens));
@ -801,9 +799,7 @@ struct ggml_tensor * forward(
// inpL shape [n_embd,N,1,1] // inpL shape [n_embd,N,1,1]
inpL = ggml_mul(ctx0, inpL = ggml_mul(ctx0,
ggml_repeat(ctx0, ggml_repeat(ctx0,
ggml_add(ctx0, norm,
model->norm,
lora->norm),
inpL), inpL),
inpL); inpL);
@ -1073,7 +1069,7 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
const int n_ff = get_n_ff(&hparams); const int n_ff = get_n_ff(&hparams);
const int rope_mode = 0; const int rope_mode = 0;
GGML_ASSERT(n_layer == lora.layers.size()); GGML_ASSERT(n_layer == lora->layers.size());
auto set_name = [](struct ggml_tensor * t, const char * n) { auto set_name = [](struct ggml_tensor * t, const char * n) {
ggml_set_name(t, n); ggml_set_name(t, n);
@ -1117,6 +1113,7 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
struct ggml_tensor * wq = ggml_add(ctx, layer.wq, ggml_mul_mat(ctx, llayer.wq_a, llayer.wq_b)); struct ggml_tensor * wq = ggml_add(ctx, layer.wq, ggml_mul_mat(ctx, llayer.wq_a, llayer.wq_b));
struct ggml_tensor * wk = ggml_add(ctx, layer.wk, ggml_mul_mat(ctx, llayer.wk_a, llayer.wk_b)); struct ggml_tensor * wk = ggml_add(ctx, layer.wk, ggml_mul_mat(ctx, llayer.wk_a, llayer.wk_b));
struct ggml_tensor * wv = ggml_add(ctx, layer.wv, ggml_mul_mat(ctx, llayer.wv_a, llayer.wv_b)); struct ggml_tensor * wv = ggml_add(ctx, layer.wv, ggml_mul_mat(ctx, llayer.wv_a, llayer.wv_b));
struct ggml_tensor * wo = ggml_add(ctx, layer.wo, ggml_mul_mat(ctx, llayer.wo_a, llayer.wo_b));
struct ggml_tensor * w1 = ggml_add(ctx, layer.w1, ggml_mul_mat(ctx, llayer.w1_a, llayer.w1_b)); struct ggml_tensor * w1 = ggml_add(ctx, layer.w1, ggml_mul_mat(ctx, llayer.w1_a, llayer.w1_b));
struct ggml_tensor * w2 = ggml_add(ctx, layer.w2, ggml_mul_mat(ctx, llayer.w2_a, llayer.w2_b)); struct ggml_tensor * w2 = ggml_add(ctx, layer.w2, ggml_mul_mat(ctx, llayer.w2_a, llayer.w2_b));
struct ggml_tensor * w3 = ggml_add(ctx, layer.w3, ggml_mul_mat(ctx, llayer.w3_a, llayer.w3_b)); struct ggml_tensor * w3 = ggml_add(ctx, layer.w3, ggml_mul_mat(ctx, llayer.w3_a, llayer.w3_b));
@ -1878,7 +1875,7 @@ void save_checkpoint(struct my_llama_model * model, struct my_llama_lora * lora,
write_opt_context(&file, opt); write_opt_context(&file, opt);
} }
bool load_checkpoint(struct my_llama_lora * model, struct my_llama_lora * lora, struct ggml_opt_context * opt, const char * filename, bool init) { bool load_checkpoint(struct my_llama_model * model, struct my_llama_lora * lora, struct ggml_opt_context * opt, const char * filename, bool init) {
struct llama_file file(filename, "rb"); struct llama_file file(filename, "rb");
uint32_t magic; uint32_t magic;
@ -1971,7 +1968,7 @@ bool load_checkpoint(struct my_llama_lora * model, struct my_llama_lora * lora,
read_tensor(&file, layer.w3_b); read_tensor(&file, layer.w3_b);
} }
read_opt_context(&file, model->ctx, opt); read_opt_context(&file, lora->ctx, opt);
} }
return (file.fp != NULL); return (file.fp != NULL);
@ -2638,6 +2635,7 @@ int main(int argc, char ** argv) {
struct llama_context_params llama_params = llama_context_default_params(); struct llama_context_params llama_params = llama_context_default_params();
llama_params.vocab_only = false; llama_params.vocab_only = false;
printf("%s: model base = '%s'\n", __func__, params.fn_model_base);
struct llama_model * lmodel = llama_load_model_from_file(params.fn_model_base, llama_params); struct llama_model * lmodel = llama_load_model_from_file(params.fn_model_base, llama_params);
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
@ -2670,7 +2668,9 @@ int main(int argc, char ** argv) {
struct my_llama_model model; struct my_llama_model model;
init_model(lmodel, &model, params.n_ctx); init_model(lmodel, &model, params.n_ctx);
struct my_llama_model lora; struct my_llama_lora lora;
lora.hparams.lora_r = params.lora_r;
lora.hparams.lora_alpha = params.lora_alpha;
lora.hparams.n_rank_attention_norm = params.n_rank_attention_norm; lora.hparams.n_rank_attention_norm = params.n_rank_attention_norm;
lora.hparams.n_rank_wq = params.n_rank_wq; lora.hparams.n_rank_wq = params.n_rank_wq;
lora.hparams.n_rank_wk = params.n_rank_wk; lora.hparams.n_rank_wk = params.n_rank_wk;
@ -2753,17 +2753,17 @@ int main(int argc, char ** argv) {
opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs; opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs;
printf("%s: init model\n", __func__); printf("%s: init model\n", __func__);
bool existed = load_checkpoint(&model, opt, params.fn_checkpoint_in, true); bool existed = load_checkpoint(&model, &lora, opt, params.fn_checkpoint_in, true);
set_param_lora(&lora); set_param_lora(&lora);
opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs; opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs;
opt->iter = model.train_its; opt->iter = lora.train_its;
printf("%s: opt iter %d\n", __func__, opt->iter); printf("%s: opt iter %d\n", __func__, opt->iter);
bool from_scratch = !existed; bool from_scratch = !existed;
if (from_scratch) { if (from_scratch) {
randomize_model(&model, params.seed, 0.0f, 1.0f, -1.0f, +1.0f); randomize_lora(&lora, params.seed, 0.0f, 1.0f, -1.0f, +1.0f);
} }
init_kv_cache(&kv_self, &model, 1); init_kv_cache(&kv_self, &model, 1);
@ -2894,9 +2894,9 @@ int main(int argc, char ** argv) {
size_t used_mem_after_opt = ggml_used_mem(ctx0); size_t used_mem_after_opt = ggml_used_mem(ctx0);
int n_iter = params.use_adam ? params.adam_n_iter : params.lbfgs_n_iter; int n_iter = params.use_adam ? params.adam_n_iter : params.lbfgs_n_iter;
model.train_its = opt->iter; lora.train_its = opt->iter;
model.train_samples += n_batch * n_iter; lora.train_samples += n_batch * n_iter;
model.train_tokens += n_batch * n_tokens * n_iter; lora.train_tokens += n_batch * n_tokens * n_iter;
if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) { if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) {
printf("Example %d, opt iter %d\n", ex, opt->iter); printf("Example %d, opt iter %d\n", ex, opt->iter);
@ -2993,7 +2993,7 @@ int main(int argc, char ** argv) {
struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_cgraph * gf = ggml_new_graph(ctx0);
int n_past = 0; int n_past = 0;
struct ggml_tensor * logits = forward(&model, &kv_self, ctx0, gf, tokens_input, sample_ctx, n_past); struct ggml_tensor * logits = forward(&model, &lora, &kv_self, ctx0, gf, tokens_input, sample_ctx, n_past);
ggml_build_forward_expand(gf, logits); ggml_build_forward_expand(gf, logits);
ggml_graph_compute_helper(work_buffer, gf, params.n_threads); ggml_graph_compute_helper(work_buffer, gf, params.n_threads);