added support for 70b llama 2
This commit is contained in:
parent
993ba3b026
commit
94499dba25
1 changed files with 5 additions and 0 deletions
|
@ -1045,6 +1045,11 @@ static void llama_model_load_internal(
|
||||||
|
|
||||||
// LLaMAv2
|
// LLaMAv2
|
||||||
// TODO: temporary until GGUF
|
// TODO: temporary until GGUF
|
||||||
|
//patch for llama2 gqa
|
||||||
|
if (model.type == e_model::MODEL_65B && hparams.n_mult >= 4096) {
|
||||||
|
fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__);
|
||||||
|
n_gqa = 8;
|
||||||
|
}
|
||||||
LLAMA_ASSERT(hparams.n_head % n_gqa == 0);
|
LLAMA_ASSERT(hparams.n_head % n_gqa == 0);
|
||||||
hparams.n_head_kv = hparams.n_head / n_gqa;
|
hparams.n_head_kv = hparams.n_head / n_gqa;
|
||||||
if (model.type == e_model::MODEL_65B && n_gqa == 8) {
|
if (model.type == e_model::MODEL_65B && n_gqa == 8) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue