increase scratch for above 4096

This commit is contained in:
Concedo 2023-07-02 14:55:08 +08:00
parent b85ea580d3
commit e19483ca0f

View file

@ -1127,7 +1127,7 @@ static void llama_model_load_internal(
const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1; const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1;
// this is the total memory required to run the inference // this is the total memory required to run the inference
const size_t bigctxmul = (hparams.n_ctx>2048?2:1); const size_t bigctxmul = (hparams.n_ctx>4096?3:(hparams.n_ctx>2048?2:1));
const size_t mem_required = const size_t mem_required =
ctx_size + ctx_size +
mmapped_size - vram_weights + // weights in VRAM not in memory mmapped_size - vram_weights + // weights in VRAM not in memory