remove finetune option to disable allocator
the allocator should always be used. by making sure that it is always used it gets easier to implement automatic memory requirements computation
This commit is contained in:
parent
4fd51c4616
commit
e0da1684db
1 changed files with 55 additions and 70 deletions
|
@ -791,7 +791,8 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
|
||||||
ggml_build_backward_expand(ctx, gf, gb, true);
|
ggml_build_backward_expand(ctx, gf, gb, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alloc) {
|
GGML_ASSERT(alloc != NULL);
|
||||||
|
|
||||||
// make sure some tensors are not reallocated by inserting new temporary nodes depending on them
|
// make sure some tensors are not reallocated by inserting new temporary nodes depending on them
|
||||||
int n_leafs_before = gb->n_leafs;
|
int n_leafs_before = gb->n_leafs;
|
||||||
int n_nodes_before = gb->n_nodes;
|
int n_nodes_before = gb->n_nodes;
|
||||||
|
@ -840,7 +841,6 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
|
||||||
}
|
}
|
||||||
gb->n_leafs = n_leafs_before;
|
gb->n_leafs = n_leafs_before;
|
||||||
gb->n_nodes = n_nodes_before;
|
gb->n_nodes = n_nodes_before;
|
||||||
}
|
|
||||||
|
|
||||||
*logits = t35;
|
*logits = t35;
|
||||||
return t36;
|
return t36;
|
||||||
|
@ -1596,7 +1596,6 @@ struct train_params {
|
||||||
bool use_adam;
|
bool use_adam;
|
||||||
bool use_flash;
|
bool use_flash;
|
||||||
bool use_checkpointing;
|
bool use_checkpointing;
|
||||||
bool use_alloc;
|
|
||||||
|
|
||||||
// only adam
|
// only adam
|
||||||
int warmup;
|
int warmup;
|
||||||
|
@ -1670,7 +1669,6 @@ struct train_params get_default_train_params() {
|
||||||
params.use_adam = true;
|
params.use_adam = true;
|
||||||
params.use_flash = true;
|
params.use_flash = true;
|
||||||
params.use_checkpointing = true;
|
params.use_checkpointing = true;
|
||||||
params.use_alloc = true;
|
|
||||||
|
|
||||||
params.opt_past = 0;
|
params.opt_past = 0;
|
||||||
params.opt_delta = 1e-5f;
|
params.opt_delta = 1e-5f;
|
||||||
|
@ -1982,10 +1980,6 @@ bool train_params_parse(int argc, char ** argv, struct train_params * params) {
|
||||||
params->use_checkpointing = false;
|
params->use_checkpointing = false;
|
||||||
} else if (arg == "--use-checkpointing") {
|
} else if (arg == "--use-checkpointing") {
|
||||||
params->use_checkpointing = true;
|
params->use_checkpointing = true;
|
||||||
} else if (arg == "--no-alloc") {
|
|
||||||
params->use_alloc = false;
|
|
||||||
} else if (arg == "--use-alloc") {
|
|
||||||
params->use_alloc = true;
|
|
||||||
} else if (arg == "--warmup") {
|
} else if (arg == "--warmup") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
@ -2346,11 +2340,8 @@ int main(int argc, char ** argv) {
|
||||||
size_t size_buf_0 = 1024ll*1024ll*1024ll*((size_t) params.mem_compute0_gb);
|
size_t size_buf_0 = 1024ll*1024ll*1024ll*((size_t) params.mem_compute0_gb);
|
||||||
uint8_t * compute_buf_0 = new uint8_t[size_buf_0];
|
uint8_t * compute_buf_0 = new uint8_t[size_buf_0];
|
||||||
|
|
||||||
ggml_allocr * alloc = NULL;
|
|
||||||
if (params.use_alloc) {
|
|
||||||
static const size_t tensor_alignment = 32;
|
static const size_t tensor_alignment = 32;
|
||||||
alloc = ggml_allocr_new(compute_buf_0, size_buf_0, tensor_alignment);
|
ggml_allocr * alloc = ggml_allocr_new(compute_buf_0, size_buf_0, tensor_alignment);
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> train_samples;
|
std::vector<int> train_samples;
|
||||||
if (params.n_examples > 0) {
|
if (params.n_examples > 0) {
|
||||||
|
@ -2409,11 +2400,9 @@ int main(int argc, char ** argv) {
|
||||||
struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||||
struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||||
|
|
||||||
ggml_set_no_alloc(ctx0, (alloc != NULL));
|
ggml_set_no_alloc(ctx0, true);
|
||||||
|
|
||||||
if (alloc) {
|
|
||||||
ggml_allocr_reset(alloc);
|
ggml_allocr_reset(alloc);
|
||||||
}
|
|
||||||
|
|
||||||
opt_cb_data.tokens_input = tokens_input;
|
opt_cb_data.tokens_input = tokens_input;
|
||||||
opt_cb_data.target_logits = target_logits;
|
opt_cb_data.target_logits = target_logits;
|
||||||
|
@ -2461,7 +2450,6 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
size_t used_mem_after_opt = ggml_used_mem(ctx0);
|
size_t used_mem_after_opt = ggml_used_mem(ctx0);
|
||||||
|
|
||||||
|
|
||||||
if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) {
|
if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) {
|
||||||
printf("Example %d, opt iter %d\n", ex, opt->iter);
|
printf("Example %d, opt iter %d\n", ex, opt->iter);
|
||||||
printf("error_before_opt: %.6f\n", opt->loss_before);
|
printf("error_before_opt: %.6f\n", opt->loss_before);
|
||||||
|
@ -2495,10 +2483,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
opt_cb_data.last_save_iter = opt->iter;
|
opt_cb_data.last_save_iter = opt->iter;
|
||||||
|
|
||||||
if (alloc) {
|
|
||||||
ggml_allocr_free(alloc);
|
ggml_allocr_free(alloc);
|
||||||
}
|
|
||||||
|
|
||||||
delete[] compute_addr;
|
delete[] compute_addr;
|
||||||
delete[] compute_buf_0;
|
delete[] compute_buf_0;
|
||||||
ggml_free(lora.ctx);
|
ggml_free(lora.ctx);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue