diff --git a/llama.cpp b/llama.cpp index e797d132f..d46bd31a9 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1076,7 +1076,7 @@ static void llama_model_load_internal( // LLaMAv2 // TODO: temporary until GGUF //patch for llama2 gqa - if (model.type == e_model::MODEL_65B && hparams.n_mult >= 4096) { + if (model.type == e_model::MODEL_65B && hparams.n_mult == 4096) { fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__); n_gqa = 8; }