diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a7197282..a54fac0bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) +option(LLAMA_NVAPI "llama: use NvAPI to control performance states on NVIDIA GPUs" ON) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b22f3cde0..13d1a6f3a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,6 +12,7 @@ endif() # # nvapi + if (LLAMA_NVAPI) add_library(nvapi nvapi.cpp nvapi.h) diff --git a/src/llama.cpp b/src/llama.cpp index e82d2fb42..ff4f6aa90 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1,7 +1,6 @@ #define LLAMA_API_INTERNAL #include "llama.h" -#include "nvapi.h" #include "unicode.h" #include "ggml.h" @@ -30,6 +29,10 @@ # include "ggml-metal.h" #endif +#ifdef LLAMA_NVAPI +# include "nvapi.h" +#endif + // TODO: replace with ggml API call #define QK_K 256 @@ -17081,7 +17084,7 @@ void llama_backend_init(void) { ggml_free(ctx); } -#if defined(GGML_USE_CUDA) && defined(LLAMA_NVAPI) +#ifdef LLAMA_NVAPI // initalize NvAPI library nvapi_init(); #endif @@ -17096,7 +17099,7 @@ void llama_numa_init(enum ggml_numa_strategy numa) { void llama_backend_free(void) { ggml_quantize_free(); -#if defined(GGML_USE_CUDA) && defined(LLAMA_NVAPI) +#ifdef LLAMA_NVAPI // free NvAPI library nvapi_free(); #endif