From b068f2f4b57e8f41c81d0a5949b58ba1d6bfb219 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Fri, 21 Jul 2023 11:58:52 +0300 Subject: [PATCH] Adjusted look ahead in ggml_cuda_pool_malloc to 5% This is sufficient it seems. We end up using about 200 MB less VRAM that way when running the 13B model with context 8192. --- ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index e05b9667e..49f9e21e6 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2466,7 +2466,7 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) { (uint32_t)(max_size/1024/1024), (uint32_t)(tot_size/1024/1024), (uint32_t)(size/1024/1024)); #endif void * ptr; - size_t look_ahead_size = (size_t) (1.25 * size); + size_t look_ahead_size = (size_t) (1.05 * size); look_ahead_size = 256 * ((look_ahead_size + 255)/256); CUDA_CHECK(cudaMalloc((void **) &ptr, look_ahead_size)); *actual_size = look_ahead_size;