From 1ad8f0d80eebdb56bac4e76100975a8fc14b1d62 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 14 Dec 2023 16:00:44 +0800
Subject: [PATCH 1/2] Fixes "Not enough space in the context's memory pool"
 encountered on certain models, which seems to be caused by some imprecision
 related to the automatic casting of floating point values

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 0e5ab044c..b8b806f5f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1555,7 +1555,7 @@ static bool llama_kv_cache_init(
     cache.cells.clear();
     cache.cells.resize(n_ctx);
 
-    cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead());
+    cache.buf.resize(n_elements*((size_t)(ggml_type_sizef(ktype) + ggml_type_sizef(vtype))) + 2u*n_layer*ggml_tensor_overhead());
     memset(cache.buf.data, 0, cache.buf.size);
 
     struct ggml_init_params params;

From 05f7db4b29693bae138fe14c024bc960b87fa1d2 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 14 Dec 2023 16:43:34 +0800
Subject: [PATCH 2/2] do not cast to size_t, instead just use doubles

---
 ggml.c    | 4 ++--
 ggml.h    | 2 +-
 llama.cpp | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ggml.c b/ggml.c
index 29e18a24c..cbdf93bfc 100644
--- a/ggml.c
+++ b/ggml.c
@@ -2011,8 +2011,8 @@ size_t ggml_type_size(enum ggml_type type) {
     return type_traits[type].type_size;
 }
 
-float ggml_type_sizef(enum ggml_type type) {
-    return ((float)(type_traits[type].type_size))/type_traits[type].blck_size;
+double ggml_type_sizef(enum ggml_type type) {
+    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
 }
 
 const char * ggml_type_name(enum ggml_type type) {
diff --git a/ggml.h b/ggml.h
index 1447646b1..f04c259e3 100644
--- a/ggml.h
+++ b/ggml.h
@@ -643,7 +643,7 @@ extern "C" {
 
     GGML_API int     ggml_blck_size (enum ggml_type type);
     GGML_API size_t  ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
-    GGML_API float   ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
+    GGML_API double   ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
 
     GGML_API const char * ggml_type_name(enum ggml_type type);
     GGML_API const char * ggml_op_name  (enum ggml_op   op);
diff --git a/llama.cpp b/llama.cpp
index b8b806f5f..0e5ab044c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1555,7 +1555,7 @@ static bool llama_kv_cache_init(
     cache.cells.clear();
     cache.cells.resize(n_ctx);
 
-    cache.buf.resize(n_elements*((size_t)(ggml_type_sizef(ktype) + ggml_type_sizef(vtype))) + 2u*n_layer*ggml_tensor_overhead());
+    cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead());
     memset(cache.buf.data, 0, cache.buf.size);
 
     struct ggml_init_params params;