added support for 70b llama 2

This commit is contained in:
Concedo 2023-07-24 15:20:18 +08:00
parent 993ba3b026
commit 94499dba25

View file

@ -1045,6 +1045,11 @@ static void llama_model_load_internal(
// LLaMAv2 // LLaMAv2
// TODO: temporary until GGUF // TODO: temporary until GGUF
//patch for llama2 gqa
if (model.type == e_model::MODEL_65B && hparams.n_mult >= 4096) {
fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__);
n_gqa = 8;
}
LLAMA_ASSERT(hparams.n_head % n_gqa == 0); LLAMA_ASSERT(hparams.n_head % n_gqa == 0);
hparams.n_head_kv = hparams.n_head / n_gqa; hparams.n_head_kv = hparams.n_head / n_gqa;
if (model.type == e_model::MODEL_65B && n_gqa == 8) { if (model.type == e_model::MODEL_65B && n_gqa == 8) {