diff --git a/otherarch/llama_v3.cpp b/otherarch/llama_v3.cpp
index 0f2313aec..03a6438e2 100644
--- a/otherarch/llama_v3.cpp
+++ b/otherarch/llama_v3.cpp
@@ -3457,7 +3457,7 @@ struct llama_v3_context * llama_v3_new_context_with_model(
 #ifdef LLAMA_V3_USE_ALLOCATOR
         {
             static const size_t tensor_alignment = 32;
-            static const size_t GGML_MAX_NODES = 4096;
+            static const size_t GGML_MAX_NODES = 8192;
             // the compute buffer is used to store the tensor and graph structs, while the allocator buffer is used for the tensor data
             ctx->buf_compute.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());