max nodes 8192

This commit is contained in:
Concedo 2023-11-18 11:02:35 +08:00
parent a3f708afce
commit 45ad1b97f8

View file

@ -3457,7 +3457,7 @@ struct llama_v3_context * llama_v3_new_context_with_model(
#ifdef LLAMA_V3_USE_ALLOCATOR
{
static const size_t tensor_alignment = 32;
static const size_t GGML_MAX_NODES = 4096;
static const size_t GGML_MAX_NODES = 8192;
// the compute buffer is used to store the tensor and graph structs, while the allocator buffer is used for the tensor data
ctx->buf_compute.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());