llama: increase MEM_REQ_EVAL for MODEL_3B
It avoids crashing for quantized weights on CPU. Better ways to calculate the required buffer size would be better.
This commit is contained in:
parent
41819b0bd7
commit
5c6eed39ee
1 changed files with 1 additions and 1 deletions
|
@ -122,7 +122,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
|
|||
static const std::map<e_model, size_t> & MEM_REQ_EVAL()
|
||||
{
|
||||
static std::map<e_model, size_t> k_sizes = {
|
||||
{ MODEL_3B, 512ull * MB },
|
||||
{ MODEL_3B, 640ull * MB },
|
||||
{ MODEL_7B, 768ull * MB },
|
||||
{ MODEL_13B, 1024ull * MB },
|
||||
{ MODEL_30B, 1280ull * MB },
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue