diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1acf4bb08..d68489ea6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1294,6 +1294,8 @@ endif()
 add_library(llama
             llama.cpp
             llama.h
+            nvapi.cpp
+            nvapi.h
             unicode.h
             unicode.cpp
             unicode-data.cpp
diff --git a/llama.cpp b/llama.cpp
index 33e6cb722..ca74e74f9 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1,6 +1,7 @@
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 
+#include "nvapi.h"
 #include "unicode.h"
 
 #include "ggml.h"
@@ -16490,6 +16491,11 @@ void llama_backend_init(void) {
         struct ggml_context * ctx = ggml_init(params);
         ggml_free(ctx);
     }
+
+#ifdef GGML_USE_CUDA
+    // initalize NvAPI library
+    nvapi_init();
+#endif
 }
 
 void llama_numa_init(enum ggml_numa_strategy numa) {
@@ -16500,6 +16506,11 @@ void llama_numa_init(enum ggml_numa_strategy numa) {
 
 void llama_backend_free(void) {
     ggml_quantize_free();
+
+#ifdef GGML_USE_CUDA
+    // free NvAPI library
+    nvapi_free();
+#endif
 }
 
 int64_t llama_time_us(void) {
diff --git a/nvapi.cpp b/nvapi.cpp
new file mode 100644
index 000000000..9c19d4b4a
--- /dev/null
+++ b/nvapi.cpp
@@ -0,0 +1,112 @@
+#include "nvapi.h"
+
+#ifdef _WIN32
+  #include <libloaderapi.h>
+#elif __linux__
+  #include <dlfcn.h>
+#endif
+
+/////
+
+static void* lib;
+
+static bool  load_success;
+
+typedef void* (*nvapi_QueryInterface_t)(int);
+typedef int   (*NvAPI_EnumPhysicalGPUs_t)(void*, void*);
+typedef int   (*NvAPI_GPU_SetForcePstate_t)(void*, int, int);
+typedef int   (*NvAPI_Initialize_t)();
+typedef int   (*NvAPI_Unload_t)();
+
+static nvapi_QueryInterface_t     nvapi_QueryInterface;
+static NvAPI_EnumPhysicalGPUs_t   NvAPI_EnumPhysicalGPUs;
+static NvAPI_GPU_SetForcePstate_t NvAPI_GPU_SetForcePstate;
+static NvAPI_Initialize_t         NvAPI_Initialize;
+static NvAPI_Unload_t             NvAPI_Unload;
+
+/////
+
+void nvapi_init() {
+  // load library
+  #ifdef _WIN32
+    if (!lib) {
+      lib = LoadLibrary("nvapi64.dll");
+    }
+
+    if (!lib) {
+      lib = LoadLibrary("nvapi.dll");
+    }
+  #elif __linux__
+    if (!lib) {
+      lib = dlopen("libnvidia-api.so.1", RTLD_LAZY);
+    }
+
+    if (!lib) {
+      lib = dlopen("libnvidia-api.so", RTLD_LAZY);
+    }
+  #endif
+
+  // lookup QueryInterface
+  if (lib) {
+    #ifdef _WIN32
+      if (!nvapi_QueryInterface) {
+        nvapi_QueryInterface = (nvapi_QueryInterface_t) GetProcAddress(lib, "nvapi_QueryInterface");
+      }
+    #elif __linux__
+      if (!nvapi_QueryInterface) {
+        nvapi_QueryInterface = (nvapi_QueryInterface_t) dlsym(lib, "nvapi_QueryInterface");
+      }
+    #endif
+  }
+
+  // resolve functions
+  if (nvapi_QueryInterface) {
+    NvAPI_EnumPhysicalGPUs = (NvAPI_EnumPhysicalGPUs_t) nvapi_QueryInterface(0xe5ac921f);
+    NvAPI_GPU_SetForcePstate = (NvAPI_GPU_SetForcePstate_t) nvapi_QueryInterface(0x025bfb10);
+    NvAPI_Initialize = (NvAPI_Initialize_t) nvapi_QueryInterface(0x0150e828);
+    NvAPI_Unload = (NvAPI_Unload_t) nvapi_QueryInterface(0xd22bdd7e);
+  }
+
+  // initialize library
+  if (NvAPI_Initialize()) {
+    load_success = true;
+  }
+}
+
+void nvapi_free() {
+  // deinitialize library
+  if (load_success) {
+    NvAPI_Unload();
+  }
+
+  // free library
+  #ifdef _WIN32
+    if (lib) {
+      FreeLibrary(lib);
+    }
+  #else
+    if (lib) {
+      dlclose(lib);
+    }
+  #endif
+
+  // invalidate pointers
+  lib = nullptr;
+  load_success = false;
+}
+
+void nvapi_set_pstate(int ids[], int ids_size, int pstate) {
+  if (!load_success) {
+    return;
+  }
+
+  // TODO
+}
+
+void nvapi_set_pstate_high() {
+  nvapi_set_pstate({}, 0, 16);
+}
+
+void nvapi_set_pstate_low() {
+  nvapi_set_pstate({}, 0, 8);
+}
diff --git a/nvapi.h b/nvapi.h
new file mode 100644
index 000000000..d73b983ec
--- /dev/null
+++ b/nvapi.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void nvapi_init();
+void nvapi_free();
+
+#ifdef __cplusplus
+}
+#endif