llama : NvAPI performance state change support

2024-06-25 16:31:06 +00:00 · 2024-06-25 16:31:06 +00:00 · 450eafc7b8
commit 450eafc7b8
parent 925c30956d
4 changed files with 137 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1294,6 +1294,8 @@ endif()
 add_library(llama
            llama.cpp
            llama.h
            nvapi.cpp
            nvapi.h
            unicode.h
            unicode.cpp
            unicode-data.cpp
--- a/llama.cpp
+++ b/llama.cpp
@ -1,6 +1,7 @@
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 #include "nvapi.h"
 #include "unicode.h"
 #include "ggml.h"
@ -16490,6 +16491,11 @@ void llama_backend_init(void) {
        struct ggml_context * ctx = ggml_init(params);
        ggml_free(ctx);
    }
 #ifdef GGML_USE_CUDA
    // initalize NvAPI library
    nvapi_init();
 #endif
 }
 void llama_numa_init(enum ggml_numa_strategy numa) {
@ -16500,6 +16506,11 @@ void llama_numa_init(enum ggml_numa_strategy numa) {
 void llama_backend_free(void) {
    ggml_quantize_free();
 #ifdef GGML_USE_CUDA
    // free NvAPI library
    nvapi_free();
 #endif
 }
 int64_t llama_time_us(void) {
--- a/nvapi.cpp
+++ b/nvapi.cpp
@ -0,0 +1,112 @@
 #include "nvapi.h"
 #ifdef _WIN32
  #include <libloaderapi.h>
 #elif __linux__
  #include <dlfcn.h>
 #endif
 /////
 static void* lib;
 static bool  load_success;
 typedef void* (*nvapi_QueryInterface_t)(int);
 typedef int   (*NvAPI_EnumPhysicalGPUs_t)(void*, void*);
 typedef int   (*NvAPI_GPU_SetForcePstate_t)(void*, int, int);
 typedef int   (*NvAPI_Initialize_t)();
 typedef int   (*NvAPI_Unload_t)();
 static nvapi_QueryInterface_t     nvapi_QueryInterface;
 static NvAPI_EnumPhysicalGPUs_t   NvAPI_EnumPhysicalGPUs;
 static NvAPI_GPU_SetForcePstate_t NvAPI_GPU_SetForcePstate;
 static NvAPI_Initialize_t         NvAPI_Initialize;
 static NvAPI_Unload_t             NvAPI_Unload;
 /////
 void nvapi_init() {
  // load library
  #ifdef _WIN32
    if (!lib) {
      lib = LoadLibrary("nvapi64.dll");
    }
    if (!lib) {
      lib = LoadLibrary("nvapi.dll");
    }
  #elif __linux__
    if (!lib) {
      lib = dlopen("libnvidia-api.so.1", RTLD_LAZY);
    }
    if (!lib) {
      lib = dlopen("libnvidia-api.so", RTLD_LAZY);
    }
  #endif
  // lookup QueryInterface
  if (lib) {
    #ifdef _WIN32
      if (!nvapi_QueryInterface) {
        nvapi_QueryInterface = (nvapi_QueryInterface_t) GetProcAddress(lib, "nvapi_QueryInterface");
      }
    #elif __linux__
      if (!nvapi_QueryInterface) {
        nvapi_QueryInterface = (nvapi_QueryInterface_t) dlsym(lib, "nvapi_QueryInterface");
      }
    #endif
  }
  // resolve functions
  if (nvapi_QueryInterface) {
    NvAPI_EnumPhysicalGPUs = (NvAPI_EnumPhysicalGPUs_t) nvapi_QueryInterface(0xe5ac921f);
    NvAPI_GPU_SetForcePstate = (NvAPI_GPU_SetForcePstate_t) nvapi_QueryInterface(0x025bfb10);
    NvAPI_Initialize = (NvAPI_Initialize_t) nvapi_QueryInterface(0x0150e828);
    NvAPI_Unload = (NvAPI_Unload_t) nvapi_QueryInterface(0xd22bdd7e);
  }
  // initialize library
  if (NvAPI_Initialize()) {
    load_success = true;
  }
 }
 void nvapi_free() {
  // deinitialize library
  if (load_success) {
    NvAPI_Unload();
  }
  // free library
  #ifdef _WIN32
    if (lib) {
      FreeLibrary(lib);
    }
  #else
    if (lib) {
      dlclose(lib);
    }
  #endif
  // invalidate pointers
  lib = nullptr;
  load_success = false;
 }
 void nvapi_set_pstate(int ids[], int ids_size, int pstate) {
  if (!load_success) {
    return;
  }
  // TODO
 }
 void nvapi_set_pstate_high() {
  nvapi_set_pstate({}, 0, 16);
 }
 void nvapi_set_pstate_low() {
  nvapi_set_pstate({}, 0, 8);
 }
--- a/nvapi.h
+++ b/nvapi.h
@ -0,0 +1,12 @@
 #pragma once
 #ifdef __cplusplus
 extern "C" {
 #endif
 void nvapi_init();
 void nvapi_free();
 #ifdef __cplusplus
 }
 #endif