diff --git a/CMakeLists.txt b/CMakeLists.txt index a54fac0bf..9d1c3b7ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) -option(LLAMA_NVAPI "llama: use NvAPI to control performance states on NVIDIA GPUs" ON) +option(LLAMA_NVAPI "llama: use NvAPI to control performance states on NVIDIA GPUs" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) @@ -102,6 +102,11 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC) llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL) llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16) +# enable NVAPI for CUDA builds +if (GGML_CUDA) + set(LLAMA_NVAPI ON) +endif() + # # build the library # diff --git a/src/llama.cpp b/src/llama.cpp index ff4f6aa90..e116b7498 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17085,7 +17085,7 @@ void llama_backend_init(void) { } #ifdef LLAMA_NVAPI - // initalize NvAPI library + // initalize NVAPI library nvapi_init(); #endif } @@ -17100,7 +17100,7 @@ void llama_backend_free(void) { ggml_quantize_free(); #ifdef LLAMA_NVAPI - // free NvAPI library + // free NVAPI library nvapi_free(); #endif } diff --git a/src/nvapi.cpp b/src/nvapi.cpp index e0ec359a3..45bc77f36 100644 --- a/src/nvapi.cpp +++ b/src/nvapi.cpp @@ -6,18 +6,25 @@ # include #endif +#include +#include +#include +#include + ///// -static void * lib; - -static bool load_success; - typedef void * (*nvapi_QueryInterface_t)(int); typedef int (*NvAPI_EnumPhysicalGPUs_t)(void *, void *); typedef int (*NvAPI_GPU_SetForcePstate_t)(void *, int, int); typedef int (*NvAPI_Initialize_t)(); typedef int (*NvAPI_Unload_t)(); +///// + +static bool load_success; + +static void * lib; + static nvapi_QueryInterface_t nvapi_QueryInterface; static NvAPI_EnumPhysicalGPUs_t NvAPI_EnumPhysicalGPUs; static NvAPI_GPU_SetForcePstate_t NvAPI_GPU_SetForcePstate; @@ -43,7 +50,7 @@ static std::set parse_visible_devices() { std::string item; // iterate over the comma-separated device IDs in the environment variable - while (std::getline(ss, item, ",")) { + while (std::getline(ss, item, ',')) { try { // convert the current item to an integer and insert it into the set devices.insert(std::stoi(item)); @@ -97,8 +104,10 @@ void nvapi_init() { } // initialize the NVAPI library - if (NvAPI_Initialize()) { - load_success = true; + if (NvAPI_Initialize) { + if (NvAPI_Initialize()) { + load_success = true; + } } } @@ -118,8 +127,13 @@ void nvapi_free() { } // reset the pointers and flags - lib = nullptr; load_success = false; + lib = nullptr; + nvapi_QueryInterface = nullptr; + NvAPI_EnumPhysicalGPUs = nullptr; + NvAPI_GPU_SetForcePstate = nullptr; + NvAPI_Initialize = nullptr; + NvAPI_Unload = nullptr; } void nvapi_set_pstate(int pstate) { @@ -146,13 +160,13 @@ void nvapi_set_pstate(int pstate) { // iterate over each GPU for (int i = 0; i < gpu_count; i++) { // if the set of visible devices is not empty and the current GPU ID is not in this set, skip this iteration - if (!devices.empty() && !devices.find(i)) { + if (!devices.empty() && devices.find(i) == devices.end()) { continue; } // attempt to set the performance state for the current GPU - if (NvAPI_GPU_SetForcePstate(gpu_array[gpu_id], pstate, 2) != 0) { - fprintf(stderr, "Failed to set performance state for gpu #%d\n", gpu_id); + if (NvAPI_GPU_SetForcePstate(gpu_array[i], pstate, 2) != 0) { + fprintf(stderr, "Failed to set performance state for gpu #%d\n", i); } } }