train-text-from-scratch can train (full finetune) gguf models
just pass the gguf model via `--checkpoint-in FN`. after this, to continue training, pass the generated checkpoint instead of the original gguf model. tested with smaller models, bigger models may exceed available memory. use (LORA) finetune for those.
This commit is contained in:
parent
e6b7158123
commit
fc456edda6
1 changed files with 14 additions and 10 deletions
|
@ -1188,7 +1188,8 @@ void save_llama_model_file(const char * filename, const char * fn_vocab_model, s
|
|||
void load_checkpoint_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model, struct ggml_opt_context * opt) {
|
||||
load_llama_model_gguf(fctx, f_ggml_ctx, model);
|
||||
|
||||
uint32_t file_version;
|
||||
if (gguf_find_key(fctx, LLM_KV_TRAINING_FILE_VERSION) >= 0) {
|
||||
uint32_t file_version = 0xFFFFFFFFu;
|
||||
GGUF_GET_KEY(fctx, file_version, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_FILE_VERSION);
|
||||
GGML_ASSERT(file_version == 0);
|
||||
|
||||
|
@ -1201,6 +1202,9 @@ void load_checkpoint_gguf(struct gguf_context * fctx, struct ggml_context * f_gg
|
|||
GGUF_GET_KEY(fctx, model->train_tokens, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_TOKEN_COUNT);
|
||||
|
||||
load_opt_context_gguf(fctx, f_ggml_ctx, opt);
|
||||
} else {
|
||||
printf("%s: loaded llama model as checkpoint\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue