CUDA GPU acceleration for LoRAs + f16 models (#1970)
This commit is contained in:
parent
cfa0750bc9
commit
7f9753fa12
4 changed files with 78 additions and 19 deletions
|
@ -416,13 +416,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
if (!params.lora_adapter.empty() && params.n_gpu_layers > 0) {
|
||||
fprintf(stderr, "%s: error: the simultaneous use of LoRAs and GPU acceleration is not supported", __func__);
|
||||
exit(1);
|
||||
}
|
||||
#endif // GGML_USE_CUBLAS
|
||||
|
||||
if (escape_prompt) {
|
||||
process_escapes(params.prompt);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue