diff --git a/common/common.cpp b/common/common.cpp index abe752352..faafc6ce5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -659,7 +659,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.mul_mat_q = false; #else fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS or SYCL. Disabling mul_mat_q kernels has no effect.\n"); -#endif // GGML_USE_CUBLAS +#endif // GGML_USE_CUBLAS || GGML_USE_SYCL } else if (arg == "--no-mmap") { params.use_mmap = false; } else if (arg == "--numa") { @@ -1021,6 +1021,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu); +#endif // LLAMA_SUPPORTS_GPU_OFFLOAD #ifdef GGML_USE_CLBLAS printf(" -nommq, --no-mul-mat-q\n"); printf(" use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n");