From c9983a72d6ee1924153e5f983ea27983dd091100 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 25 Oct 2023 15:18:03 +0800 Subject: [PATCH] prevent lora with clblast --- llama.cpp | 5 +++-- otherarch/llama_v3.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index 249028ddf..76b47ccdc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8854,11 +8854,12 @@ static int llama_apply_lora_from_file_internal( offload_func_t offload_func = llama_nop; offload_func_t offload_func_force_inplace = llama_nop; -#ifdef GGML_USE_CUBLAS +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (dest_t->backend == GGML_BACKEND_GPU || dest_t->backend == GGML_BACKEND_GPU_SPLIT) { if (dest_t->type != GGML_TYPE_F16) { + printf("\nError: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models\n"); throw std::runtime_error(format( - "%s: error: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models", __func__)); + "%s: error: lora failed", __func__)); } offload_func = ggml_cuda_assign_buffers; offload_func_force_inplace = ggml_cuda_assign_buffers_force_inplace; diff --git a/otherarch/llama_v3.cpp b/otherarch/llama_v3.cpp index 703350a80..7eb3485f7 100644 --- a/otherarch/llama_v3.cpp +++ b/otherarch/llama_v3.cpp @@ -3763,11 +3763,12 @@ int llama_v3_apply_lora_from_file_internal(const struct llama_v3_model & model, offload_func_t offload_func = llama_v3_nop; offload_func_t offload_func_force_inplace = llama_v3_nop; -#ifdef GGML_USE_CUBLAS +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (dest_t->backend == GGML_BACKEND_GPU || dest_t->backend == GGML_BACKEND_GPU_SPLIT) { if (dest_t->type != GGML_TYPE_F16) { + printf("\nError: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models\n"); throw std::runtime_error(format_old( - "%s: error: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models", __func__)); + "%s: error: lora failed", __func__)); } offload_func = ggml_cuda_assign_buffers; offload_func_force_inplace = ggml_cuda_assign_buffers_force_inplace;