avoid stack overflow resulting from big ggml_cgraph

replace stack allocation and ggml_build_forward by ggml_new_graph in combination with ggml_build_forward_expand
This commit is contained in:
xaedes 2023-08-16 16:42:25 +02:00
parent 0ab2507ce5
commit 39a2d15461
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -3653,9 +3653,10 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
ggml_set_name(r, "r_cpy"); ggml_set_name(r, "r_cpy");
} }
struct ggml_cgraph gf = ggml_build_forward(r); struct ggml_cgraph * gf = ggml_new_graph(lora_ctx);
ggml_build_forward_expand(gf, r);
ggml_graph_compute_helper(work_buffer, &gf, n_threads); ggml_graph_compute_helper(work_buffer, gf, n_threads);
// we won't need these tensors again, reset the context to save memory // we won't need these tensors again, reset the context to save memory
ggml_free(lora_ctx); ggml_free(lora_ctx);