diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a7197282..a54fac0bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,6 +68,7 @@ option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 
 # 3rd party libs
 option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
+option(LLAMA_NVAPI "llama: use NvAPI to control performance states on NVIDIA GPUs" ON)
 
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b22f3cde0..13d1a6f3a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -12,6 +12,7 @@ endif()
 #
 
 # nvapi
+
 if (LLAMA_NVAPI)
     add_library(nvapi nvapi.cpp nvapi.h)
 
diff --git a/src/llama.cpp b/src/llama.cpp
index e82d2fb42..ff4f6aa90 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1,7 +1,6 @@
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 
-#include "nvapi.h"
 #include "unicode.h"
 
 #include "ggml.h"
@@ -30,6 +29,10 @@
 #  include "ggml-metal.h"
 #endif
 
+#ifdef LLAMA_NVAPI
+#  include "nvapi.h"
+#endif
+
 // TODO: replace with ggml API call
 #define QK_K 256
 
@@ -17081,7 +17084,7 @@ void llama_backend_init(void) {
         ggml_free(ctx);
     }
 
-#if defined(GGML_USE_CUDA) && defined(LLAMA_NVAPI)
+#ifdef LLAMA_NVAPI
     // initalize NvAPI library
     nvapi_init();
 #endif
@@ -17096,7 +17099,7 @@ void llama_numa_init(enum ggml_numa_strategy numa) {
 void llama_backend_free(void) {
     ggml_quantize_free();
 
-#if defined(GGML_USE_CUDA) && defined(LLAMA_NVAPI)
+#ifdef LLAMA_NVAPI
     // free NvAPI library
     nvapi_free();
 #endif