From 1ad8f0d80eebdb56bac4e76100975a8fc14b1d62 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:00:44 +0800 Subject: [PATCH 1/2] Fixes "Not enough space in the context's memory pool" encountered on certain models, which seems to be caused by some imprecision related to the automatic casting of floating point values --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 0e5ab044c..b8b806f5f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1555,7 +1555,7 @@ static bool llama_kv_cache_init( cache.cells.clear(); cache.cells.resize(n_ctx); - cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead()); + cache.buf.resize(n_elements*((size_t)(ggml_type_sizef(ktype) + ggml_type_sizef(vtype))) + 2u*n_layer*ggml_tensor_overhead()); memset(cache.buf.data, 0, cache.buf.size); struct ggml_init_params params; From 05f7db4b29693bae138fe14c024bc960b87fa1d2 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:43:34 +0800 Subject: [PATCH 2/2] do not cast to size_t, instead just use doubles --- ggml.c | 4 ++-- ggml.h | 2 +- llama.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml.c b/ggml.c index 29e18a24c..cbdf93bfc 100644 --- a/ggml.c +++ b/ggml.c @@ -2011,8 +2011,8 @@ size_t ggml_type_size(enum ggml_type type) { return type_traits[type].type_size; } -float ggml_type_sizef(enum ggml_type type) { - return ((float)(type_traits[type].type_size))/type_traits[type].blck_size; +double ggml_type_sizef(enum ggml_type type) { + return ((double)(type_traits[type].type_size))/type_traits[type].blck_size; } const char * ggml_type_name(enum ggml_type type) { diff --git a/ggml.h b/ggml.h index 1447646b1..f04c259e3 100644 --- a/ggml.h +++ b/ggml.h @@ -643,7 +643,7 @@ extern "C" { GGML_API int ggml_blck_size (enum ggml_type type); GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block - GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float + GGML_API double ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float GGML_API const char * ggml_type_name(enum ggml_type type); GGML_API const char * ggml_op_name (enum ggml_op op); diff --git a/llama.cpp b/llama.cpp index b8b806f5f..0e5ab044c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1555,7 +1555,7 @@ static bool llama_kv_cache_init( cache.cells.clear(); cache.cells.resize(n_ctx); - cache.buf.resize(n_elements*((size_t)(ggml_type_sizef(ktype) + ggml_type_sizef(vtype))) + 2u*n_layer*ggml_tensor_overhead()); + cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead()); memset(cache.buf.data, 0, cache.buf.size); struct ggml_init_params params;