From e19483ca0f8b8f7900b45ddc7bb5e4143f2518f4 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 2 Jul 2023 14:55:08 +0800 Subject: [PATCH] increase scratch for above 4096 --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 989a71431..f0cb84139 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1127,7 +1127,7 @@ static void llama_model_load_internal( const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1; // this is the total memory required to run the inference - const size_t bigctxmul = (hparams.n_ctx>2048?2:1); + const size_t bigctxmul = (hparams.n_ctx>4096?3:(hparams.n_ctx>2048?2:1)); const size_t mem_required = ctx_size + mmapped_size - vram_weights + // weights in VRAM not in memory