prevent lora with clblast
This commit is contained in:
parent
30d1017021
commit
c9983a72d6
2 changed files with 6 additions and 4 deletions
|
@ -8854,11 +8854,12 @@ static int llama_apply_lora_from_file_internal(
|
|||
offload_func_t offload_func = llama_nop;
|
||||
offload_func_t offload_func_force_inplace = llama_nop;
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
|
||||
if (dest_t->backend == GGML_BACKEND_GPU || dest_t->backend == GGML_BACKEND_GPU_SPLIT) {
|
||||
if (dest_t->type != GGML_TYPE_F16) {
|
||||
printf("\nError: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models\n");
|
||||
throw std::runtime_error(format(
|
||||
"%s: error: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models", __func__));
|
||||
"%s: error: lora failed", __func__));
|
||||
}
|
||||
offload_func = ggml_cuda_assign_buffers;
|
||||
offload_func_force_inplace = ggml_cuda_assign_buffers_force_inplace;
|
||||
|
|
|
@ -3763,11 +3763,12 @@ int llama_v3_apply_lora_from_file_internal(const struct llama_v3_model & model,
|
|||
offload_func_t offload_func = llama_v3_nop;
|
||||
offload_func_t offload_func_force_inplace = llama_v3_nop;
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
|
||||
if (dest_t->backend == GGML_BACKEND_GPU || dest_t->backend == GGML_BACKEND_GPU_SPLIT) {
|
||||
if (dest_t->type != GGML_TYPE_F16) {
|
||||
printf("\nError: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models\n");
|
||||
throw std::runtime_error(format_old(
|
||||
"%s: error: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models", __func__));
|
||||
"%s: error: lora failed", __func__));
|
||||
}
|
||||
offload_func = ggml_cuda_assign_buffers;
|
||||
offload_func_force_inplace = ggml_cuda_assign_buffers_force_inplace;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue