From 39a2d154610a743df8fec7a242e7d8abe8676071 Mon Sep 17 00:00:00 2001
From: xaedes <xaedes@gmail.com>
Date: Wed, 16 Aug 2023 16:42:25 +0200
Subject: [PATCH] avoid stack overflow resulting from big ggml_cgraph

replace stack allocation and ggml_build_forward by ggml_new_graph in combination with ggml_build_forward_expand
---
 llama.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 92a787096..35ea68075 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3653,9 +3653,10 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
                 ggml_set_name(r, "r_cpy");
             }
 
-            struct ggml_cgraph gf = ggml_build_forward(r);
+            struct ggml_cgraph * gf = ggml_new_graph(lora_ctx);
+            ggml_build_forward_expand(gf, r);
 
-            ggml_graph_compute_helper(work_buffer, &gf, n_threads);
+            ggml_graph_compute_helper(work_buffer, gf, n_threads);
 
             // we won't need these tensors again, reset the context to save memory
             ggml_free(lora_ctx);