ggml : fix AVX build + update to new Q8_0 format

This commit is contained in:
Georgi Gerganov 2023-04-22 11:08:12 +03:00
parent 955ef9a5d5
commit 872c365a91
3 changed files with 20 additions and 8 deletions

View file

@ -68,7 +68,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{ MODEL_65B, 512ull * MB },
};
return _MEM_REQ_SCRATCH1;
};
}
// 2*n_embd*n_ctx*n_layer*sizeof(float16)
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
@ -80,7 +80,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
{ MODEL_65B, 5120ull * MB },
};
return _MEM_REQ_KV_SELF;
};
}
// this is mostly needed for temporary mul_mat buffers to dequantize the data
// not actually needed if BLAS is disabled
@ -93,7 +93,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{ MODEL_65B, 1536ull * MB },
};
return _MEM_REQ_EVAL;
};
}
// default hparams (LLaMA 7B)
struct llama_hparams {