ggml : automatic selection of best CPU backend (#10606)

* ggml : automatic selection of best CPU backend

* amx : minor opt

* add GGML_AVX_VNNI to enable avx-vnni, fix checks
This commit is contained in:
Diego Devesa 2024-12-01 16:12:41 +01:00 committed by GitHub
parent 86dc11c5bc
commit 3420909dff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 599 additions and 156 deletions

View file

@ -2,8 +2,13 @@
#include "ggml-backend.h"
#include "ggml-impl.h"
#include <algorithm>
#include <codecvt>
#include <cstring>
#include <filesystem>
#include <locale>
#include <memory>
#include <string>
#include <type_traits>
#include <vector>
#ifdef _WIN32
@ -57,9 +62,71 @@
#include "ggml-kompute.h"
#endif
#ifdef _WIN32
using dl_handle = std::remove_pointer_t<HMODULE>;
struct dl_handle_deleter {
void operator()(HMODULE handle) {
FreeLibrary(handle);
}
};
static dl_handle * dl_load_library(const std::wstring & path) {
// suppress error dialogs for missing DLLs
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
HMODULE handle = LoadLibraryW(path.c_str());
SetErrorMode(old_mode);
return handle;
}
static dl_handle * dl_load_library(const std::string & path) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return dl_load_library(converter.from_bytes(path));
}
static void * dl_get_sym(dl_handle * handle, const char * name) {
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
void * p = (void *) GetProcAddress(handle, name);
SetErrorMode(old_mode);
return p;
}
#else
using dl_handle = void;
struct dl_handle_deleter {
void operator()(void * handle) {
dlclose(handle);
}
};
static void * dl_load_library(const std::string & path) {
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
return handle;
}
static void * dl_get_sym(dl_handle * handle, const char * name) {
return dlsym(handle, name);
}
#endif
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
struct ggml_backend_reg_entry {
ggml_backend_reg_t reg;
void * handle;
dl_handle_ptr handle;
};
struct ggml_backend_registry {
@ -97,13 +164,16 @@ struct ggml_backend_registry {
}
~ggml_backend_registry() {
while (!backends.empty()) {
// use silent since the log system may have been destroyed at this point
unload_backend(backends.back().reg, true);
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
// since backend threads may still be running and accessing resources from the dynamic library
for (auto & entry : backends) {
if (entry.handle) {
entry.handle.release(); // NOLINT
}
}
}
void register_backend(ggml_backend_reg_t reg, void * handle = nullptr) {
void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
if (!reg) {
return;
}
@ -112,7 +182,7 @@ struct ggml_backend_registry {
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
#endif
backends.push_back({ reg, handle });
backends.push_back({ reg, std::move(handle) });
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
register_device(ggml_backend_reg_dev_get(reg, i));
}
@ -126,79 +196,53 @@ struct ggml_backend_registry {
}
ggml_backend_reg_t load_backend(const char * path, bool silent) {
#ifdef _WIN32
// suppress error dialogs for missing DLLs
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
HMODULE handle = LoadLibraryA(path);
dl_handle_ptr handle { dl_load_library(path) };
if (!handle) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
}
SetErrorMode(old_mode);
return nullptr;
}
ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
SetErrorMode(old_mode);
if (!backend_init) {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn && score_fn() == 0) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
}
FreeLibrary(handle);
return nullptr;
}
#else
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
if (!handle) {
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
if (!backend_init_fn) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
}
return nullptr;
}
auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
if (!backend_init) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
}
dlclose(handle);
return nullptr;
}
#endif
ggml_backend_reg_t reg = backend_init();
ggml_backend_reg_t reg = backend_init_fn();
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
if (!silent) {
if (!reg) {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
} else {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
}
}
#ifdef _WIN32
FreeLibrary(handle);
#else
dlclose(handle);
#endif
return nullptr;
}
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
register_backend(reg, handle);
register_backend(reg, std::move(handle));
return reg;
}
void unload_backend(ggml_backend_reg_t reg, bool silent) {
auto it = std::find_if(backends.begin(), backends.end(),
[reg](ggml_backend_reg_entry entry) { return entry.reg == reg; });
[reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
if (it == backends.end()) {
if (!silent) {
@ -217,15 +261,6 @@ struct ggml_backend_registry {
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
devices.end());
// unload library
if (it->handle) {
#ifdef _WIN32
FreeLibrary((HMODULE) it->handle);
#else
dlclose(it->handle);
#endif
}
// remove backend
backends.erase(it);
}
@ -341,12 +376,7 @@ void ggml_backend_unload(ggml_backend_reg_t reg) {
get_reg().unload_backend(reg, true);
}
void ggml_backend_load_all() {
std::vector<std::string> search_prefix;
// add the executable directory to the search path
// FIXME: this is convenient for development, but it should probably be disabled in production
static std::string get_executable_path() {
#if defined(__APPLE__)
// get executable path
std::vector<char> path;
@ -364,7 +394,7 @@ void ggml_backend_load_all() {
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
search_prefix.push_back(base_path + "/");
return base_path + "/";
#elif defined(__linux__)
std::string base_path = ".";
std::vector<char> path(1024);
@ -386,38 +416,104 @@ void ggml_backend_load_all() {
path.resize(path.size() * 2);
}
search_prefix.push_back(base_path + "/");
return base_path + "/";
#elif defined(_WIN32)
std::vector<char> path(MAX_PATH);
DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
if (len == 0) {
return "";
}
std::string base_path(path.data(), len);
// remove executable name
auto last_slash = base_path.find_last_of('\\');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
return base_path + "\\";
#endif
}
auto & reg = get_reg();
auto try_load = [&](const std::string & name) {
std::string os_name;
static std::string backend_filename_prefix() {
#ifdef _WIN32
os_name = "ggml-" + name + ".dll";
return "ggml-";
#else
os_name = "libggml-" + name + ".so";
return "libggml-";
#endif
if (reg.load_backend(os_name.c_str(), true)) {
return;
}
static std::string backend_filename_suffix() {
#ifdef _WIN32
return ".dll";
#else
return ".so";
#endif
}
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths
std::vector<std::string> search_paths = { "./", get_executable_path() };
std::string file_prefix = backend_filename_prefix() + name + "-";
int best_score = 0;
std::string best_path;
namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
}
for (const auto & prefix : search_prefix) {
if (reg.load_backend((prefix + os_name).c_str(), true)) {
return;
for (const auto & entry : fs::directory_iterator(search_path)) {
if (entry.is_regular_file()) {
std::string filename = entry.path().filename().string();
std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
if (!handle && !silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
}
if (handle) {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn) {
int s = score_fn();
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
#endif
if (s > best_score) {
best_score = s;
best_path = entry.path().string();
}
}
}
}
}
}
};
}
try_load("amx");
try_load("blas");
try_load("cann");
try_load("cuda");
try_load("hip");
try_load("kompute");
try_load("metal");
try_load("rpc");
try_load("sycl");
try_load("vulkan");
try_load("musa");
try_load("cpu");
if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
if (fs::exists(path)) {
return get_reg().load_backend(path.c_str(), silent);
}
}
return nullptr;
}
return get_reg().load_backend(best_path.c_str(), silent);
}
void ggml_backend_load_all() {
ggml_backend_load_best("blas", true);
ggml_backend_load_best("cann", true);
ggml_backend_load_best("cuda", true);
ggml_backend_load_best("hip", true);
ggml_backend_load_best("kompute", true);
ggml_backend_load_best("metal", true);
ggml_backend_load_best("rpc", true);
ggml_backend_load_best("sycl", true);
ggml_backend_load_best("vulkan", true);
ggml_backend_load_best("musa", true);
ggml_backend_load_best("cpu", true);
}