remove finetune option to disable allocator

the allocator should always be used. by making sure that it is always used it gets easier to implement automatic memory requirements computation
2023-08-31 16:45:47 +02:00 · 2023-08-31 16:45:47 +02:00 · e0da1684db
commit e0da1684db
parent 4fd51c4616
1 changed files with 55 additions and 70 deletions
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@ -791,7 +791,8 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
        ggml_build_backward_expand(ctx, gf, gb, true);
    }
-    if (alloc) {
+    GGML_ASSERT(alloc != NULL);
    // make sure some tensors are not reallocated by inserting new temporary nodes depending on them
    int n_leafs_before = gb->n_leafs;
    int n_nodes_before = gb->n_nodes;
@ -840,7 +841,6 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
    }
    gb->n_leafs = n_leafs_before;
    gb->n_nodes = n_nodes_before;
    }
    *logits = t35;
    return t36;
@ -1596,7 +1596,6 @@ struct train_params {
    bool use_adam;
    bool use_flash;
    bool use_checkpointing;
    bool use_alloc;
    // only adam
    int   warmup;
@ -1670,7 +1669,6 @@ struct train_params get_default_train_params() {
    params.use_adam               = true;
    params.use_flash              = true;
    params.use_checkpointing      = true;
    params.use_alloc              = true;
    params.opt_past               = 0;
    params.opt_delta              = 1e-5f;
@ -1982,10 +1980,6 @@ bool train_params_parse(int argc, char ** argv, struct train_params * params) {
            params->use_checkpointing = false;
        } else if (arg == "--use-checkpointing") {
            params->use_checkpointing = true;
        } else if (arg == "--no-alloc") {
            params->use_alloc = false;
        } else if (arg == "--use-alloc") {
            params->use_alloc = true;
        } else if (arg == "--warmup") {
            if (++i >= argc) {
                invalid_param = true;
@ -2346,11 +2340,8 @@ int main(int argc, char ** argv) {
    size_t size_buf_0 = 1024ll*1024ll*1024ll*((size_t) params.mem_compute0_gb);
    uint8_t * compute_buf_0 = new uint8_t[size_buf_0];
    ggml_allocr * alloc = NULL;
    if (params.use_alloc) {
    static const size_t tensor_alignment = 32;
-        alloc = ggml_allocr_new(compute_buf_0, size_buf_0, tensor_alignment);
+    ggml_allocr * alloc = ggml_allocr_new(compute_buf_0, size_buf_0, tensor_alignment);
    }
    std::vector<int> train_samples;
    if (params.n_examples > 0) {
@ -2409,11 +2400,9 @@ int main(int argc, char ** argv) {
        struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
        struct ggml_tensor * target_probs  = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
-        ggml_set_no_alloc(ctx0, (alloc != NULL));
+        ggml_set_no_alloc(ctx0, true);
        if (alloc) {
        ggml_allocr_reset(alloc);
        }
        opt_cb_data.tokens_input  = tokens_input;
        opt_cb_data.target_logits = target_logits;
@ -2461,7 +2450,6 @@ int main(int argc, char ** argv) {
        size_t used_mem_after_opt = ggml_used_mem(ctx0);
        if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) {
            printf("Example %d, opt iter %d\n", ex, opt->iter);
            printf("error_before_opt: %.6f\n", opt->loss_before);
@ -2495,10 +2483,7 @@ int main(int argc, char ** argv) {
    opt_cb_data.last_save_iter = opt->iter;
    if (alloc) {
    ggml_allocr_free(alloc);
    }
    delete[] compute_addr;
    delete[] compute_buf_0;
    ggml_free(lora.ctx);