diff --git a/CMakeLists.txt b/CMakeLists.txt
index d68489ea6..703078010 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -872,6 +872,18 @@ if (LLAMA_CPU_HBM)
     target_link_libraries(ggml PUBLIC memkind)
 endif()
 
+if (LLAMA_CUDA)
+    if (LLAMA_NVAPI)
+        add_library(nvapi nvapi.cpp nvapi.h)
+
+        if (LINUX)
+            target_link_libraries(nvapi PUBLIC dl)
+        endif()
+
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} nvapi)
+    endif()
+endif()
+
 function(get_flags CCID CCVER)
     set(C_FLAGS "")
     set(CXX_FLAGS "")
@@ -1294,8 +1306,6 @@ endif()
 add_library(llama
             llama.cpp
             llama.h
-            nvapi.cpp
-            nvapi.h
             unicode.h
             unicode.cpp
             unicode-data.cpp
diff --git a/Makefile b/Makefile
index 2b841ab40..f6e8eb73e 100644
--- a/Makefile
+++ b/Makefile
@@ -698,7 +698,7 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
 endif
 endif # LLAMA_METAL
 
-OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o nvapi.o unicode.o unicode-data.o
+OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
 COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
 COMMON_DEPS   = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
 
@@ -794,16 +794,13 @@ ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
 ggml-blas.o: ggml-blas.cpp ggml-blas.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-nvapi.o: nvapi.cpp nvapi.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
 unicode.o: unicode.cpp unicode.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 unicode-data.o: unicode-data.cpp unicode-data.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-llama.o: llama.cpp nvapi.h unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
+llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 common.o: common/common.cpp $(COMMON_H_DEPS)
diff --git a/llama.cpp b/llama.cpp
index ca74e74f9..b6481259a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -16492,7 +16492,7 @@ void llama_backend_init(void) {
         ggml_free(ctx);
     }
 
-#ifdef GGML_USE_CUDA
+#if defined(GGML_USE_CUDA) && defined(LLAMA_NVAPI)
     // initalize NvAPI library
     nvapi_init();
 #endif
@@ -16507,7 +16507,7 @@ void llama_numa_init(enum ggml_numa_strategy numa) {
 void llama_backend_free(void) {
     ggml_quantize_free();
 
-#ifdef GGML_USE_CUDA
+#if defined(GGML_USE_CUDA) && defined(LLAMA_NVAPI)
     // free NvAPI library
     nvapi_free();
 #endif