minor fixes

This commit is contained in:
sasha0552 2024-06-27 08:54:02 +00:00 committed by GitHub
parent 7cdad3a693
commit 742597e31a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 33 additions and 14 deletions

View file

@ -68,7 +68,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
# 3rd party libs # 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
option(LLAMA_NVAPI "llama: use NvAPI to control performance states on NVIDIA GPUs" ON) option(LLAMA_NVAPI "llama: use NvAPI to control performance states on NVIDIA GPUs" OFF)
# Required for relocatable CMake package # Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
@ -102,6 +102,11 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL) llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16) llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
# enable NVAPI for CUDA builds
if (GGML_CUDA)
set(LLAMA_NVAPI ON)
endif()
# #
# build the library # build the library
# #

View file

@ -17085,7 +17085,7 @@ void llama_backend_init(void) {
} }
#ifdef LLAMA_NVAPI #ifdef LLAMA_NVAPI
// initalize NvAPI library // initalize NVAPI library
nvapi_init(); nvapi_init();
#endif #endif
} }
@ -17100,7 +17100,7 @@ void llama_backend_free(void) {
ggml_quantize_free(); ggml_quantize_free();
#ifdef LLAMA_NVAPI #ifdef LLAMA_NVAPI
// free NvAPI library // free NVAPI library
nvapi_free(); nvapi_free();
#endif #endif
} }

View file

@ -6,18 +6,25 @@
# include <dlfcn.h> # include <dlfcn.h>
#endif #endif
#include <cstdio>
#include <set>
#include <sstream>
#include <string>
///// /////
static void * lib;
static bool load_success;
typedef void * (*nvapi_QueryInterface_t)(int); typedef void * (*nvapi_QueryInterface_t)(int);
typedef int (*NvAPI_EnumPhysicalGPUs_t)(void *, void *); typedef int (*NvAPI_EnumPhysicalGPUs_t)(void *, void *);
typedef int (*NvAPI_GPU_SetForcePstate_t)(void *, int, int); typedef int (*NvAPI_GPU_SetForcePstate_t)(void *, int, int);
typedef int (*NvAPI_Initialize_t)(); typedef int (*NvAPI_Initialize_t)();
typedef int (*NvAPI_Unload_t)(); typedef int (*NvAPI_Unload_t)();
/////
static bool load_success;
static void * lib;
static nvapi_QueryInterface_t nvapi_QueryInterface; static nvapi_QueryInterface_t nvapi_QueryInterface;
static NvAPI_EnumPhysicalGPUs_t NvAPI_EnumPhysicalGPUs; static NvAPI_EnumPhysicalGPUs_t NvAPI_EnumPhysicalGPUs;
static NvAPI_GPU_SetForcePstate_t NvAPI_GPU_SetForcePstate; static NvAPI_GPU_SetForcePstate_t NvAPI_GPU_SetForcePstate;
@ -43,7 +50,7 @@ static std::set<int> parse_visible_devices() {
std::string item; std::string item;
// iterate over the comma-separated device IDs in the environment variable // iterate over the comma-separated device IDs in the environment variable
while (std::getline(ss, item, ",")) { while (std::getline(ss, item, ',')) {
try { try {
// convert the current item to an integer and insert it into the set // convert the current item to an integer and insert it into the set
devices.insert(std::stoi(item)); devices.insert(std::stoi(item));
@ -97,10 +104,12 @@ void nvapi_init() {
} }
// initialize the NVAPI library // initialize the NVAPI library
if (NvAPI_Initialize) {
if (NvAPI_Initialize()) { if (NvAPI_Initialize()) {
load_success = true; load_success = true;
} }
} }
}
void nvapi_free() { void nvapi_free() {
// if the library was successfully initialized, unload it // if the library was successfully initialized, unload it
@ -118,8 +127,13 @@ void nvapi_free() {
} }
// reset the pointers and flags // reset the pointers and flags
lib = nullptr;
load_success = false; load_success = false;
lib = nullptr;
nvapi_QueryInterface = nullptr;
NvAPI_EnumPhysicalGPUs = nullptr;
NvAPI_GPU_SetForcePstate = nullptr;
NvAPI_Initialize = nullptr;
NvAPI_Unload = nullptr;
} }
void nvapi_set_pstate(int pstate) { void nvapi_set_pstate(int pstate) {
@ -146,13 +160,13 @@ void nvapi_set_pstate(int pstate) {
// iterate over each GPU // iterate over each GPU
for (int i = 0; i < gpu_count; i++) { for (int i = 0; i < gpu_count; i++) {
// if the set of visible devices is not empty and the current GPU ID is not in this set, skip this iteration // if the set of visible devices is not empty and the current GPU ID is not in this set, skip this iteration
if (!devices.empty() && !devices.find(i)) { if (!devices.empty() && devices.find(i) == devices.end()) {
continue; continue;
} }
// attempt to set the performance state for the current GPU // attempt to set the performance state for the current GPU
if (NvAPI_GPU_SetForcePstate(gpu_array[gpu_id], pstate, 2) != 0) { if (NvAPI_GPU_SetForcePstate(gpu_array[i], pstate, 2) != 0) {
fprintf(stderr, "Failed to set performance state for gpu #%d\n", gpu_id); fprintf(stderr, "Failed to set performance state for gpu #%d\n", i);
} }
} }
} }