From 6a20f7a2f0ef884e9092552d6998ff741c488957 Mon Sep 17 00:00:00 2001 From: xaedes Date: Fri, 25 Aug 2023 22:32:39 +0200 Subject: [PATCH] bug fixes --- .../train-text-from-scratch.cpp | 77 +++++++++++++------ 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index e4304126d..9fed3dd35 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -235,7 +235,6 @@ struct my_llama_model { uint32_t train_tokens = 0; }; - // gguf constants const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type"; const char * LLM_KV_OPTIMIZER_TYPE_ADAM = "adam"; @@ -1280,13 +1279,43 @@ static std::string format(const char * fmt, ...) { } int tokenize_file(struct llama_context * lctx, const char * filename, std::vector& out) { - struct llama_file f(filename, "rb"); + FILE * fp = std::fopen(filename, "rb"); + if (fp == NULL) { + return 0; + } + +#ifdef _WIN32 + GGML_ASSERT(_fseeki64(fp, (__int64) 0, SEEK_END) == 0); +#else + GGML_ASSERT(std::fseek(fp, (long) 0, SEEK_END) == 0); +#endif + + size_t size = 0; +#ifdef _WIN32 + __int64 ret = _ftelli64(fp); + size = ret; +#else + long ret = std::ftell(fp); + size = ret; +#endif + +#ifdef _WIN32 + GGML_ASSERT(_fseeki64(fp, (__int64) 0, SEEK_SET) == 0); +#else + GGML_ASSERT(std::fseek(fp, (long) 0, SEEK_SET) == 0); +#endif std::vector buf; - buf.resize(f.size+1); + buf.resize(size+1); - f.read_raw(buf.data(), f.size); - buf[f.size] = '\0'; + if (std::fread(buf.data(), size, 1, fp) != 1) { + throw std::runtime_error(std::string("unexpectedly reached end of file")); + } + if (ferror(fp)) { + throw std::runtime_error(format("read error: %s", strerror(errno))); + } + + buf[size] = '\0'; int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false); if (n_tokens < 0) { @@ -1482,7 +1511,7 @@ void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, co if (dst == NULL) { return; } - struct ggml_tensor * t = ggml_get_tensor(f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS); + struct ggml_tensor * t = ggml_get_tensor(ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS); GGML_ASSERT(are_same_layout(dst, t)); memcpy(dst->data, t->data, ggml_nbytes(t)); } @@ -1503,7 +1532,7 @@ void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_g // don't call ggml_opt_init until optimizer type and optimizer specific parameters are know std::string opt_type; - GGUF_GET_KEY(fctx, opt_type, gguf_get_arr_str, GGUF_TYPE_STRING, true, LLM_KV_OPTIMIZER_TYPE); + GGUF_GET_KEY(fctx, opt_type, gguf_get_val_str, GGUF_TYPE_STRING, true, LLM_KV_OPTIMIZER_TYPE); if (opt_type == LLM_KV_OPTIMIZER_TYPE_ADAM) { opt->params.type = GGML_OPT_ADAM; @@ -1514,9 +1543,9 @@ void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_g GGML_ASSERT(opt->ctx != NULL); ggml_opt_init(opt->ctx, opt, opt->params, opt->nx); - read_tensor_by_name(opt->adam.m, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS) - read_tensor_by_name(opt->adam.v, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS) - read_tensor_by_name(opt->adam.pf, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS) + read_tensor_by_name(opt->adam.m, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS); + read_tensor_by_name(opt->adam.v, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS); + read_tensor_by_name(opt->adam.pf, f_ggml_ctx, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS); } else if (opt_type == LLM_KV_OPTIMIZER_TYPE_LBFGS) { opt->params.type = GGML_OPT_LBFGS; @@ -1569,7 +1598,7 @@ void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * ggml_set_name(opt->adam.m, LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS); ggml_set_name(opt->adam.v, LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS); if (opt->adam.pf) { - ggml_set_name(pf, LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES); + ggml_set_name(opt->adam.pf, LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES); } gguf_add_tensor(fctx, opt->adam.m); @@ -1595,7 +1624,7 @@ void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * ggml_set_name(opt->lbfgs.gp, LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS); ggml_set_name(opt->lbfgs.d, LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION); if (opt->lbfgs.pf) { - ggml_set_name(pf, LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES); + ggml_set_name(opt->lbfgs.pf, LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES); } ggml_set_name(opt->lbfgs.lmal, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA); ggml_set_name(opt->lbfgs.lmys, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS); @@ -1646,20 +1675,20 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g GGML_ASSERT(arch == "llama"); uint32_t ftype_u; - GGUF_GET_KEY(fctx, ftype_u, gguf_get_val_u32, GGUF_TYPE_U32, true, LLM_KV_GENERAL_FILE_TYPE); + GGUF_GET_KEY(fctx, ftype_u, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_GENERAL_FILE_TYPE); GGML_ASSERT((enum llama_ftype) ftype_u == LLAMA_FTYPE_ALL_F32); - GGUF_GET_KEY(fctx, model->hparams.n_ctx, gguf_get_val_u32, GGUF_TYPE_U32, true, kv(LLM_KV_CONTEXT_LENGTH)); - GGUF_GET_KEY(fctx, model->hparams.n_embd, gguf_get_val_u32, GGUF_TYPE_U32, true, kv(LLM_KV_EMBEDDING_LENGTH)); - GGUF_GET_KEY(fctx, model->hparams.n_ff, gguf_get_val_u32, GGUF_TYPE_U32, true, kv(LLM_KV_FEED_FORWARD_LENGTH)); - GGUF_GET_KEY(fctx, model->hparams.n_head, gguf_get_val_u32, GGUF_TYPE_U32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT)); - GGUF_GET_KEY(fctx, model->hparams.n_layer, gguf_get_val_u32, GGUF_TYPE_U32, true, kv(LLM_KV_BLOCK_COUNT)); - GGUF_GET_KEY(fctx, model->hparams.n_rot, gguf_get_val_u32, GGUF_TYPE_U32, true, kv(LLM_KV_ROPE_DIMENSION_COUNT)); + GGUF_GET_KEY(fctx, model->hparams.n_ctx, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_CONTEXT_LENGTH)); + GGUF_GET_KEY(fctx, model->hparams.n_embd, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_EMBEDDING_LENGTH)); + GGUF_GET_KEY(fctx, model->hparams.n_ff, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_FEED_FORWARD_LENGTH)); + GGUF_GET_KEY(fctx, model->hparams.n_head, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT)); + GGUF_GET_KEY(fctx, model->hparams.n_layer, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_BLOCK_COUNT)); + GGUF_GET_KEY(fctx, model->hparams.n_rot, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_ROPE_DIMENSION_COUNT)); float rope_freq_scale; - GGUF_GET_KEY(fctx, model->hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_F32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); - GGUF_GET_KEY(fctx, model->hparams.rope_freq_base, gguf_get_val_f32, GGUF_TYPE_F32, true, kv(LLM_KV_ROPE_FREQ_BASE)); - GGUF_GET_KEY(fctx, rope_freq_scale, gguf_get_val_f32, GGUF_TYPE_F32, true, kv(LLM_KV_ROPE_SCALE_LINEAR)); + GGUF_GET_KEY(fctx, model->hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); + GGUF_GET_KEY(fctx, model->hparams.rope_freq_base, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ROPE_FREQ_BASE)); + GGUF_GET_KEY(fctx, rope_freq_scale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ROPE_SCALE_LINEAR)); model->hparams.rope_freq_scale = 1.0f / rope_freq_scale; init_model(model); @@ -1668,7 +1697,7 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g read_tensor_by_name(model->norm, f_ggml_ctx, tn(LLM_TENSOR_OUTPUT_NORM)); read_tensor_by_name(model->output, f_ggml_ctx, tn(LLM_TENSOR_OUTPUT)); - for (uint32_t i = 0; i < n_layer; ++i) { + for (uint32_t i = 0; i < model->hparams.n_layer; ++i) { auto & layer = model->layers[i]; read_tensor_by_name(layer.attention_norm, f_ggml_ctx, tni(LLM_TENSOR_ATTN_NORM, i)); @@ -2509,7 +2538,7 @@ int main(int argc, char ** argv) { opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs; printf("%s: init model\n", __func__); - bool existed = load_checkpoint(&model, opt, params.fn_checkpoint_in, true); + bool existed = load_checkpoint_file(params.fn_checkpoint_in, &model, opt); set_param_model(&model); opt->params = params.use_adam ? opt_params_adam : opt_params_lbfgs;