From 7df517c797872e5b235659046099b5a60daa64b4 Mon Sep 17 00:00:00 2001 From: xaedes Date: Wed, 23 Aug 2023 20:08:48 +0200 Subject: [PATCH] update finetune README --- examples/finetune/README.md | 55 +++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/examples/finetune/README.md b/examples/finetune/README.md index 726ec47c0..ea17c38d9 100644 --- a/examples/finetune/README.md +++ b/examples/finetune/README.md @@ -1,4 +1,4 @@ -# train-text-from-scratch +# finetune Basic usage instructions: @@ -6,17 +6,50 @@ Basic usage instructions: # get training data wget https://raw.githubusercontent.com/brunoklein99/deep-learning-notes/master/shakespeare.txt -# train -./bin/train-text-from-scratch \ - --vocab-model ../models/ggml-vocab.bin \ - --ctx 64 --embd 256 --head 8 --layer 16 \ - --checkpoint-in chk-shakespeare-256x16.bin \ - --checkpoint-out chk-shakespeare-256x16.bin \ - --model-out ggml-shakespeare-256x16-f32.bin \ +# finetune LORA adapter +./bin/finetune \ + --model-base open-llama-3b-v2-q8_0.bin \ + --checkpoint-in chk-lora-open-llama-3b-v2-q8_0-shakespeare-LATEST.bin \ + --checkpoint-out chk-lora-open-llama-3b-v2-q8_0-shakespeare-ITERATION.bin \ + --model-out lora-open-llama-3b-v2-q8_0-shakespeare-ITERATION.bin \ --train-data "shakespeare.txt" \ - -t 6 -b 16 -n 32 --seed 1 --adam-iter 16 \ - --print-details-interval 0 --predict 16 --use-flash + --save-every 10 \ + --threads 6 --adam-iter 30 --batch 4 --ctx 64 \ + --print-details-interval 0 --predict 0 \ + --use-checkpointing --use-alloc \ + --mem-lora 2 --mem-compute 1 --mem-compute0 20 # predict -./bin/main -m ggml-shakespeare-256x16-f32.bin +./bin/main -m open-llama-3b-v2-q8_0.bin --lora lora-open-llama-3b-v2-q8_0-shakespeare-LATEST.bin ``` + +Finetune output files will be saved every N iterations (config with `--save-every N`). +The pattern "ITERATION" in the output filenames will be replaced with the iteration number and "LATEST" for the latest output. + +Gradient checkpointing reduces the memory requirements by ~50% but increases the runtime. +If you have enough RAM, you can make finetuning a bit faster by disabling checkpointing with `--no-checkpointing`. + +To change the amount of memory for finetuning with memory allocator (`--use-alloc`, used by default), you can use `--mem-compute0 N` to specify the number of gigabytes. + +After training, text is generated using the trained LORA. +But this text prediction is not optimized as well as it is in `main`. +It may result in out-of-memory crash, to disable the text prediction after training use `--predict 0`. + +The LORA rank is configured for each model tensor type separately with these command line options: + +```bash + --rank-att-norm N LORA rank for attention norm tensor (default 1) + --rank-ffn-norm N LORA rank for feed-forward norm tensor (default 1) + --rank-out-norm N LORA rank for output norm tensor (default 1) + --rank-tok-embd N LORA rank for token embeddings tensor (default 4) + --rank-out N LORA rank for output tensor (default 4) + --rank-wq N LORA rank for wq tensor (default 4) + --rank-wk N LORA rank for wk tensor (default 4) + --rank-wv N LORA rank for wv tensor (default 4) + --rank-wo N LORA rank for wo tensor (default 4) + --rank-w1 N LORA rank for w1 tensor (default 4) + --rank-w2 N LORA rank for w2 tensor (default 4) + --rank-w3 N LORA rank for w3 tensor (default 4) +``` + +To see all available options use `finetune --help`.