Adjusted look ahead in ggml_cuda_pool_malloc to 5%
This is sufficient it seems. We end up using about 200 MB less VRAM that way when running the 13B model with context 8192.
This commit is contained in:
parent
1cdbbbb37c
commit
b068f2f4b5
1 changed files with 1 additions and 1 deletions
|
@ -2466,7 +2466,7 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
||||||
(uint32_t)(max_size/1024/1024), (uint32_t)(tot_size/1024/1024), (uint32_t)(size/1024/1024));
|
(uint32_t)(max_size/1024/1024), (uint32_t)(tot_size/1024/1024), (uint32_t)(size/1024/1024));
|
||||||
#endif
|
#endif
|
||||||
void * ptr;
|
void * ptr;
|
||||||
size_t look_ahead_size = (size_t) (1.25 * size);
|
size_t look_ahead_size = (size_t) (1.05 * size);
|
||||||
look_ahead_size = 256 * ((look_ahead_size + 255)/256);
|
look_ahead_size = 256 * ((look_ahead_size + 255)/256);
|
||||||
CUDA_CHECK(cudaMalloc((void **) &ptr, look_ahead_size));
|
CUDA_CHECK(cudaMalloc((void **) &ptr, look_ahead_size));
|
||||||
*actual_size = look_ahead_size;
|
*actual_size = look_ahead_size;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue