LLAMA_BACKEND_OFFLOAD* -> llama_backend_offload*

This commit is contained in:
Galunid 2023-11-07 22:26:31 +01:00
parent 698c94595e
commit 4713a40c11

View file

@ -3130,12 +3130,12 @@ static void llm_load_tensors(
// norm is not performance relevant on its own but keeping it in VRAM reduces data copying // norm is not performance relevant on its own but keeping it in VRAM reduces data copying
// on Windows however this is detrimental unless everything is on the GPU // on Windows however this is detrimental unless everything is on the GPU
#ifndef _WIN32 #ifndef _WIN32
backend_norm = LLAMA_BACKEND_OFFLOAD; backend_norm = llama_backend_offload;
#else #else
backend_norm = n_gpu_layers <= (int) n_layer + 2 ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD; backend_norm = n_gpu_layers <= (int) n_layer + 2 ? GGML_BACKEND_CPU : llama_backend_offload;
#endif // _WIN32 #endif // _WIN32
backend_output = LLAMA_BACKEND_OFFLOAD_SPLIT; backend_output = llama_backend_offload_split;
} else { } else {
backend_norm = GGML_BACKEND_CPU; backend_norm = GGML_BACKEND_CPU;
backend_output = GGML_BACKEND_CPU; backend_output = GGML_BACKEND_CPU;
@ -3163,8 +3163,8 @@ static void llm_load_tensors(
/* /*
llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 2560, 2560, 1, 1 ] llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 2560, 2560, 1, 1 ]
*/ */
const ggml_backend_type backend = int(i) < i_gpu_start ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD; // NOLINT const ggml_backend_type backend = int(i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload; // NOLINT
const ggml_backend_type backend_split = int(i) < i_gpu_start ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD_SPLIT; // NOLINT const ggml_backend_type backend_split = int(i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload_split; // NOLINT
auto & layer = model.layers[i]; auto & layer = model.layers[i];