From 5c6eed39ee852c0656ed1830b61e70db4b9d6bdf Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Mon, 3 Jul 2023 21:31:34 -0500 Subject: [PATCH] llama: increase MEM_REQ_EVAL for MODEL_3B It avoids crashing for quantized weights on CPU. Better ways to calculate the required buffer size would be better. --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 935130b63..f8f7c4bca 100644 --- a/llama.cpp +++ b/llama.cpp @@ -122,7 +122,7 @@ static const std::map & MEM_REQ_KV_SELF() static const std::map & MEM_REQ_EVAL() { static std::map k_sizes = { - { MODEL_3B, 512ull * MB }, + { MODEL_3B, 640ull * MB }, { MODEL_7B, 768ull * MB }, { MODEL_13B, 1024ull * MB }, { MODEL_30B, 1280ull * MB },