llama : on Metal, by default offload the full model

ggml-ci
2024-01-10 10:15:36 +02:00 · 2024-01-10 10:15:36 +02:00 · 07a1b052e5
commit 07a1b052e5
parent 3cb1c1fb4e
1 changed files with 2 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -9069,7 +9069,8 @@ struct llama_model_params llama_model_default_params() {
    };
 #ifdef GGML_USE_METAL
-    result.n_gpu_layers = 1;
+    // note: we usually have plenty of VRAM, so by default offload all layers to the GPU
    result.n_gpu_layers = 999;
 #endif
    return result;