improve detection for 70B.

This commit is contained in:
Concedo 2023-08-07 10:43:06 +08:00
parent fb44d72a78
commit 0e41b94f40

View file

@ -1076,7 +1076,7 @@ static void llama_model_load_internal(
// LLaMAv2 // LLaMAv2
// TODO: temporary until GGUF // TODO: temporary until GGUF
//patch for llama2 gqa //patch for llama2 gqa
if (model.type == e_model::MODEL_65B && hparams.n_mult >= 4096) { if (model.type == e_model::MODEL_65B && hparams.n_mult == 4096) {
fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__); fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__);
n_gqa = 8; n_gqa = 8;
} }