diff --git a/common/common.cpp b/common/common.cpp index 65103c3c2..022bfe287 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1002,9 +1002,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } params.main_gpu = std::stoi(argv[i]); -#ifndef GGML_USE_CUDA_SYCL - fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n"); -#endif // GGML_USE_CUDA_SYCL +#ifndef GGML_USE_CUDA_SYCL_VULKAN + fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n"); +#endif // GGML_USE_CUDA_SYCL_VULKAN return true; } if (arg == "--split-mode" || arg == "-sm") { @@ -1030,9 +1030,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa invalid_param = true; return true; } -#ifndef GGML_USE_CUDA_SYCL - fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n"); -#endif // GGML_USE_CUDA_SYCL +#ifndef GGML_USE_CUDA_SYCL_VULKAN + fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n"); +#endif // GGML_USE_CUDA_SYCL_VULKAN return true; } if (arg == "--tensor-split" || arg == "-ts") { diff --git a/llama.cpp b/llama.cpp index e7412de4b..cf7fe1f90 100644 --- a/llama.cpp +++ b/llama.cpp @@ -16353,7 +16353,7 @@ struct llama_context * llama_new_context_with_model( return nullptr; } if (model->split_mode == LLAMA_SPLIT_MODE_NONE) { - ggml_backend_t backend = ggml_backend_vk_init(0); + ggml_backend_t backend = ggml_backend_vk_init(model->main_gpu); if (backend == nullptr) { LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__); llama_free(ctx);