From 34b3dac66d66ccef94d090ed6920408ea554a053 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:00:44 +0800 Subject: [PATCH] Fixes "Not enough space in the context's memory pool" encountered on certain models, which seems to be caused by some imprecision related to the automatic casting of floating point values (cherry picked from commit 1ad8f0d80eebdb56bac4e76100975a8fc14b1d62) --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index fc38e18f6..1ccd8465e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1565,7 +1565,7 @@ static bool llama_kv_cache_init( cache.cells.clear(); cache.cells.resize(n_ctx); - cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead()); + cache.buf.resize(n_elements*((size_t)(ggml_type_sizef(ktype) + ggml_type_sizef(vtype))) + 2u*n_layer*ggml_tensor_overhead()); memset(cache.buf.data, 0, cache.buf.size); struct ggml_init_params params;