From b135927ca4a5f935039c3c24ac7d2b56d45865be Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 31 Oct 2024 17:49:15 +0100
Subject: [PATCH] llama : fix missing worst case flag during reserve

---
 src/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index bedacfcb5..a3756ff33 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -19694,7 +19694,7 @@ struct llama_context * llama_new_context_with_model(
             int n_nodes_tg = ggml_graph_n_nodes(gf_tg);
 
             // reserve again with pp graph to avoid ggml-alloc reallocations during inference
-            gf_pp = llama_build_graph(*ctx, ubatch_pp, false);
+            gf_pp = llama_build_graph(*ctx, ubatch_pp, true);
             if (!ggml_backend_sched_reserve(ctx->sched, gf_pp)) {
                 LLAMA_LOG_ERROR("%s: failed to allocate compute buffers\n", __func__);
                 llama_free(ctx);