From 4713a40c118b24cbf6a3857c3d9f002a7bb0de67 Mon Sep 17 00:00:00 2001 From: Galunid Date: Tue, 7 Nov 2023 22:26:31 +0100 Subject: [PATCH] LLAMA_BACKEND_OFFLOAD* -> llama_backend_offload* --- llama.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama.cpp b/llama.cpp index 8ab307555..100c7e6fc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3130,12 +3130,12 @@ static void llm_load_tensors( // norm is not performance relevant on its own but keeping it in VRAM reduces data copying // on Windows however this is detrimental unless everything is on the GPU #ifndef _WIN32 - backend_norm = LLAMA_BACKEND_OFFLOAD; + backend_norm = llama_backend_offload; #else - backend_norm = n_gpu_layers <= (int) n_layer + 2 ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD; + backend_norm = n_gpu_layers <= (int) n_layer + 2 ? GGML_BACKEND_CPU : llama_backend_offload; #endif // _WIN32 - backend_output = LLAMA_BACKEND_OFFLOAD_SPLIT; + backend_output = llama_backend_offload_split; } else { backend_norm = GGML_BACKEND_CPU; backend_output = GGML_BACKEND_CPU; @@ -3163,8 +3163,8 @@ static void llm_load_tensors( /* llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 2560, 2560, 1, 1 ] */ - const ggml_backend_type backend = int(i) < i_gpu_start ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD; // NOLINT - const ggml_backend_type backend_split = int(i) < i_gpu_start ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD_SPLIT; // NOLINT + const ggml_backend_type backend = int(i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload; // NOLINT + const ggml_backend_type backend_split = int(i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload_split; // NOLINT auto & layer = model.layers[i];