Fixes "Not enough space in the context's memory pool" encountered on certain models, which seems to be caused by some imprecision related to the automatic casting of floating point values

(cherry picked from commit 1ad8f0d80e)
2023-12-14 16:00:44 +08:00 · 2023-12-14 16:00:44 +08:00 · 34b3dac66d
commit 34b3dac66d
parent 0e31f53422
1 changed files with 1 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1565,7 +1565,7 @@ static bool llama_kv_cache_init(
    cache.cells.clear();
    cache.cells.resize(n_ctx);

-    cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead());
+    cache.buf.resize(n_elements*((size_t)(ggml_type_sizef(ktype) + ggml_type_sizef(vtype))) + 2u*n_layer*ggml_tensor_overhead());
    memset(cache.buf.data, 0, cache.buf.size);

    struct ggml_init_params params;