From 45ad1b97f89ae64f14977c4b054e9e8aff8b145d Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 18 Nov 2023 11:02:35 +0800 Subject: [PATCH] max nodes 8192 --- otherarch/llama_v3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otherarch/llama_v3.cpp b/otherarch/llama_v3.cpp index 0f2313aec..03a6438e2 100644 --- a/otherarch/llama_v3.cpp +++ b/otherarch/llama_v3.cpp @@ -3457,7 +3457,7 @@ struct llama_v3_context * llama_v3_new_context_with_model( #ifdef LLAMA_V3_USE_ALLOCATOR { static const size_t tensor_alignment = 32; - static const size_t GGML_MAX_NODES = 4096; + static const size_t GGML_MAX_NODES = 8192; // the compute buffer is used to store the tensor and graph structs, while the allocator buffer is used for the tensor data ctx->buf_compute.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());