diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index df48da877..d2451bdca 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -791,57 +791,57 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
         ggml_build_backward_expand(ctx, gf, gb, true);
     }
 
-    if (alloc) {
-        // make sure some tensors are not reallocated by inserting new temporary nodes depending on them
-        int n_leafs_before = gb->n_leafs;
-        int n_nodes_before = gb->n_nodes;
-        struct ggml_tensor * one = ggml_new_f32(ctx, 1.0f);
-        // output tensors
-        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t35, one));
-        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36, one));
-        // input gradient
-        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one));
-        GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL);
-        ggml_allocr_alloc(alloc, t36->grad);
+    GGML_ASSERT(alloc != NULL);
 
-        // make sure base model tensors data cannot be used in viewable operations
-        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->tok_embeddings, one));
-        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->norm, one));
-        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->output, one));
-        for (int il = 0; il < n_layer; ++il) {
-            struct my_llama_layer & layer = model->layers[il];
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.attention_norm, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.ffn_norm, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wq, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wk, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wv, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wo, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w1, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w2, one));
-            ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w3, one));
-        }
+    // make sure some tensors are not reallocated by inserting new temporary nodes depending on them
+    int n_leafs_before = gb->n_leafs;
+    int n_nodes_before = gb->n_nodes;
+    struct ggml_tensor * one = ggml_new_f32(ctx, 1.0f);
+    // output tensors
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t35, one));
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36, one));
+    // input gradient
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one));
+    GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL);
+    ggml_allocr_alloc(alloc, t36->grad);
 
-        // allocating checkpoints in one block to reduce memory fragmentation
-        // note: they will be freed in reverse order
-        for (unsigned int i = 0; i < checkpoints.size(); ++i) {
-            if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) {
-                ggml_allocr_alloc(alloc, checkpoints[i]);
-            }
-        }
-
-        ggml_allocr_alloc_graph(alloc, gb);
-
-        // remove the additional nodes and leafs
-        for (int i = n_leafs_before; i < gb->n_leafs; ++i) {
-            gb->leafs[i] = NULL;
-        }
-        for (int i = n_nodes_before; i < gb->n_nodes; ++i) {
-            gb->nodes[i] = NULL;
-        }
-        gb->n_leafs = n_leafs_before;
-        gb->n_nodes = n_nodes_before;
+    // make sure base model tensors data cannot be used in viewable operations
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->tok_embeddings, one));
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->norm, one));
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->output, one));
+    for (int il = 0; il < n_layer; ++il) {
+        struct my_llama_layer & layer = model->layers[il];
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.attention_norm, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.ffn_norm, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wq, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wk, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wv, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wo, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w1, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w2, one));
+        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w3, one));
     }
 
+    // allocating checkpoints in one block to reduce memory fragmentation
+    // note: they will be freed in reverse order
+    for (unsigned int i = 0; i < checkpoints.size(); ++i) {
+        if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) {
+            ggml_allocr_alloc(alloc, checkpoints[i]);
+        }
+    }
+
+    ggml_allocr_alloc_graph(alloc, gb);
+
+    // remove the additional nodes and leafs
+    for (int i = n_leafs_before; i < gb->n_leafs; ++i) {
+        gb->leafs[i] = NULL;
+    }
+    for (int i = n_nodes_before; i < gb->n_nodes; ++i) {
+        gb->nodes[i] = NULL;
+    }
+    gb->n_leafs = n_leafs_before;
+    gb->n_nodes = n_nodes_before;
+
     *logits = t35;
     return t36;
 }
@@ -1596,7 +1596,6 @@ struct train_params {
     bool use_adam;
     bool use_flash;
     bool use_checkpointing;
-    bool use_alloc;
 
     // only adam
     int   warmup;
@@ -1670,7 +1669,6 @@ struct train_params get_default_train_params() {
     params.use_adam               = true;
     params.use_flash              = true;
     params.use_checkpointing      = true;
-    params.use_alloc              = true;
 
     params.opt_past               = 0;
     params.opt_delta              = 1e-5f;
@@ -1982,10 +1980,6 @@ bool train_params_parse(int argc, char ** argv, struct train_params * params) {
             params->use_checkpointing = false;
         } else if (arg == "--use-checkpointing") {
             params->use_checkpointing = true;
-        } else if (arg == "--no-alloc") {
-            params->use_alloc = false;
-        } else if (arg == "--use-alloc") {
-            params->use_alloc = true;
         } else if (arg == "--warmup") {
             if (++i >= argc) {
                 invalid_param = true;
@@ -2346,11 +2340,8 @@ int main(int argc, char ** argv) {
     size_t size_buf_0 = 1024ll*1024ll*1024ll*((size_t) params.mem_compute0_gb);
     uint8_t * compute_buf_0 = new uint8_t[size_buf_0];
 
-    ggml_allocr * alloc = NULL;
-    if (params.use_alloc) {
-        static const size_t tensor_alignment = 32;
-        alloc = ggml_allocr_new(compute_buf_0, size_buf_0, tensor_alignment);
-    }
+    static const size_t tensor_alignment = 32;
+    ggml_allocr * alloc = ggml_allocr_new(compute_buf_0, size_buf_0, tensor_alignment);
 
     std::vector<int> train_samples;
     if (params.n_examples > 0) {
@@ -2405,15 +2396,13 @@ int main(int argc, char ** argv) {
         ggml_set_no_alloc(ctx0, false);
 
         // don't use alloc for input tensors, so we can safely fill them with data
-        struct ggml_tensor * tokens_input           = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
-        struct ggml_tensor * target_logits          = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
-        struct ggml_tensor * target_probs           = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * tokens_input  = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
+        struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
+        struct ggml_tensor * target_probs  = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
 
-        ggml_set_no_alloc(ctx0, (alloc != NULL));
+        ggml_set_no_alloc(ctx0, true);
 
-        if (alloc) {
-            ggml_allocr_reset(alloc);
-        }
+        ggml_allocr_reset(alloc);
 
         opt_cb_data.tokens_input  = tokens_input;
         opt_cb_data.target_logits = target_logits;
@@ -2461,7 +2450,6 @@ int main(int argc, char ** argv) {
 
         size_t used_mem_after_opt = ggml_used_mem(ctx0);
 
-
         if (params.print_info_interval > 0 && ex % params.print_info_interval == 0) {
             printf("Example %d, opt iter %d\n", ex, opt->iter);
             printf("error_before_opt: %.6f\n", opt->loss_before);
@@ -2495,10 +2483,7 @@ int main(int argc, char ** argv) {
 
     opt_cb_data.last_save_iter = opt->iter;
 
-    if (alloc) {
-        ggml_allocr_free(alloc);
-    }
-
+    ggml_allocr_free(alloc);
     delete[] compute_addr;
     delete[] compute_buf_0;
     ggml_free(lora.ctx);