From 07a1b052e5cc3420bb712840b186d90436abd43d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 10 Jan 2024 10:15:36 +0200 Subject: [PATCH] llama : on Metal, by default offload the full model ggml-ci --- llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index ceb70025d..b2e2ca298 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9069,7 +9069,8 @@ struct llama_model_params llama_model_default_params() { }; #ifdef GGML_USE_METAL - result.n_gpu_layers = 1; + // note: we usually have plenty of VRAM, so by default offload all layers to the GPU + result.n_gpu_layers = 999; #endif return result;