ggml : automatic selection of best CPU backend (#10606)
* ggml : automatic selection of best CPU backend * amx : minor opt * add GGML_AVX_VNNI to enable avx-vnni, fix checks
This commit is contained in:
parent
86dc11c5bc
commit
3420909dff
12 changed files with 599 additions and 156 deletions
|
@ -2,8 +2,13 @@
|
|||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include <algorithm>
|
||||
#include <codecvt>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <locale>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -57,9 +62,71 @@
|
|||
#include "ggml-kompute.h"
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
using dl_handle = std::remove_pointer_t<HMODULE>;
|
||||
|
||||
struct dl_handle_deleter {
|
||||
void operator()(HMODULE handle) {
|
||||
FreeLibrary(handle);
|
||||
}
|
||||
};
|
||||
|
||||
static dl_handle * dl_load_library(const std::wstring & path) {
|
||||
// suppress error dialogs for missing DLLs
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
HMODULE handle = LoadLibraryW(path.c_str());
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
static dl_handle * dl_load_library(const std::string & path) {
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
return dl_load_library(converter.from_bytes(path));
|
||||
}
|
||||
|
||||
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
void * p = (void *) GetProcAddress(handle, name);
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
using dl_handle = void;
|
||||
|
||||
struct dl_handle_deleter {
|
||||
void operator()(void * handle) {
|
||||
dlclose(handle);
|
||||
}
|
||||
};
|
||||
|
||||
static void * dl_load_library(const std::string & path) {
|
||||
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||
return dlsym(handle, name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
||||
|
||||
struct ggml_backend_reg_entry {
|
||||
ggml_backend_reg_t reg;
|
||||
void * handle;
|
||||
dl_handle_ptr handle;
|
||||
};
|
||||
|
||||
struct ggml_backend_registry {
|
||||
|
@ -97,13 +164,16 @@ struct ggml_backend_registry {
|
|||
}
|
||||
|
||||
~ggml_backend_registry() {
|
||||
while (!backends.empty()) {
|
||||
// use silent since the log system may have been destroyed at this point
|
||||
unload_backend(backends.back().reg, true);
|
||||
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
||||
// since backend threads may still be running and accessing resources from the dynamic library
|
||||
for (auto & entry : backends) {
|
||||
if (entry.handle) {
|
||||
entry.handle.release(); // NOLINT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void register_backend(ggml_backend_reg_t reg, void * handle = nullptr) {
|
||||
void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
||||
if (!reg) {
|
||||
return;
|
||||
}
|
||||
|
@ -112,7 +182,7 @@ struct ggml_backend_registry {
|
|||
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
||||
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
||||
#endif
|
||||
backends.push_back({ reg, handle });
|
||||
backends.push_back({ reg, std::move(handle) });
|
||||
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
||||
register_device(ggml_backend_reg_dev_get(reg, i));
|
||||
}
|
||||
|
@ -126,79 +196,53 @@ struct ggml_backend_registry {
|
|||
}
|
||||
|
||||
ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
||||
#ifdef _WIN32
|
||||
// suppress error dialogs for missing DLLs
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
HMODULE handle = LoadLibraryA(path);
|
||||
|
||||
dl_handle_ptr handle { dl_load_library(path) };
|
||||
if (!handle) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
|
||||
}
|
||||
SetErrorMode(old_mode);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
if (!backend_init) {
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (score_fn && score_fn() == 0) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
|
||||
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
|
||||
}
|
||||
FreeLibrary(handle);
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
|
||||
|
||||
if (!handle) {
|
||||
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||
if (!backend_init_fn) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
|
||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
|
||||
|
||||
if (!backend_init) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
|
||||
}
|
||||
dlclose(handle);
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
ggml_backend_reg_t reg = backend_init();
|
||||
|
||||
ggml_backend_reg_t reg = backend_init_fn();
|
||||
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||
if (!silent) {
|
||||
if (!reg) {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
|
||||
} else {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
FreeLibrary(handle);
|
||||
#else
|
||||
dlclose(handle);
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
|
||||
register_backend(reg, handle);
|
||||
|
||||
register_backend(reg, std::move(handle));
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
void unload_backend(ggml_backend_reg_t reg, bool silent) {
|
||||
auto it = std::find_if(backends.begin(), backends.end(),
|
||||
[reg](ggml_backend_reg_entry entry) { return entry.reg == reg; });
|
||||
[reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
||||
|
||||
if (it == backends.end()) {
|
||||
if (!silent) {
|
||||
|
@ -217,15 +261,6 @@ struct ggml_backend_registry {
|
|||
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
||||
devices.end());
|
||||
|
||||
// unload library
|
||||
if (it->handle) {
|
||||
#ifdef _WIN32
|
||||
FreeLibrary((HMODULE) it->handle);
|
||||
#else
|
||||
dlclose(it->handle);
|
||||
#endif
|
||||
}
|
||||
|
||||
// remove backend
|
||||
backends.erase(it);
|
||||
}
|
||||
|
@ -341,12 +376,7 @@ void ggml_backend_unload(ggml_backend_reg_t reg) {
|
|||
get_reg().unload_backend(reg, true);
|
||||
}
|
||||
|
||||
void ggml_backend_load_all() {
|
||||
std::vector<std::string> search_prefix;
|
||||
|
||||
// add the executable directory to the search path
|
||||
// FIXME: this is convenient for development, but it should probably be disabled in production
|
||||
|
||||
static std::string get_executable_path() {
|
||||
#if defined(__APPLE__)
|
||||
// get executable path
|
||||
std::vector<char> path;
|
||||
|
@ -364,7 +394,7 @@ void ggml_backend_load_all() {
|
|||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
search_prefix.push_back(base_path + "/");
|
||||
return base_path + "/";
|
||||
#elif defined(__linux__)
|
||||
std::string base_path = ".";
|
||||
std::vector<char> path(1024);
|
||||
|
@ -386,38 +416,104 @@ void ggml_backend_load_all() {
|
|||
path.resize(path.size() * 2);
|
||||
}
|
||||
|
||||
search_prefix.push_back(base_path + "/");
|
||||
return base_path + "/";
|
||||
#elif defined(_WIN32)
|
||||
std::vector<char> path(MAX_PATH);
|
||||
DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
|
||||
if (len == 0) {
|
||||
return "";
|
||||
}
|
||||
std::string base_path(path.data(), len);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('\\');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
return base_path + "\\";
|
||||
#endif
|
||||
}
|
||||
|
||||
auto & reg = get_reg();
|
||||
|
||||
auto try_load = [&](const std::string & name) {
|
||||
std::string os_name;
|
||||
static std::string backend_filename_prefix() {
|
||||
#ifdef _WIN32
|
||||
os_name = "ggml-" + name + ".dll";
|
||||
return "ggml-";
|
||||
#else
|
||||
os_name = "libggml-" + name + ".so";
|
||||
return "libggml-";
|
||||
#endif
|
||||
if (reg.load_backend(os_name.c_str(), true)) {
|
||||
return;
|
||||
}
|
||||
|
||||
static std::string backend_filename_suffix() {
|
||||
#ifdef _WIN32
|
||||
return ".dll";
|
||||
#else
|
||||
return ".so";
|
||||
#endif
|
||||
}
|
||||
|
||||
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent) {
|
||||
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||
// TODO: search system paths
|
||||
std::vector<std::string> search_paths = { "./", get_executable_path() };
|
||||
std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||
|
||||
int best_score = 0;
|
||||
std::string best_path;
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
for (const auto & search_path : search_paths) {
|
||||
if (!fs::exists(search_path)) {
|
||||
continue;
|
||||
}
|
||||
for (const auto & prefix : search_prefix) {
|
||||
if (reg.load_backend((prefix + os_name).c_str(), true)) {
|
||||
return;
|
||||
for (const auto & entry : fs::directory_iterator(search_path)) {
|
||||
if (entry.is_regular_file()) {
|
||||
std::string filename = entry.path().filename().string();
|
||||
std::string ext = entry.path().extension().string();
|
||||
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
|
||||
if (!handle && !silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||
}
|
||||
if (handle) {
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (score_fn) {
|
||||
int s = score_fn();
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||
#endif
|
||||
if (s > best_score) {
|
||||
best_score = s;
|
||||
best_path = entry.path().string();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
try_load("amx");
|
||||
try_load("blas");
|
||||
try_load("cann");
|
||||
try_load("cuda");
|
||||
try_load("hip");
|
||||
try_load("kompute");
|
||||
try_load("metal");
|
||||
try_load("rpc");
|
||||
try_load("sycl");
|
||||
try_load("vulkan");
|
||||
try_load("musa");
|
||||
try_load("cpu");
|
||||
if (best_score == 0) {
|
||||
// try to load the base backend
|
||||
for (const auto & search_path : search_paths) {
|
||||
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
|
||||
if (fs::exists(path)) {
|
||||
return get_reg().load_backend(path.c_str(), silent);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return get_reg().load_backend(best_path.c_str(), silent);
|
||||
}
|
||||
|
||||
void ggml_backend_load_all() {
|
||||
ggml_backend_load_best("blas", true);
|
||||
ggml_backend_load_best("cann", true);
|
||||
ggml_backend_load_best("cuda", true);
|
||||
ggml_backend_load_best("hip", true);
|
||||
ggml_backend_load_best("kompute", true);
|
||||
ggml_backend_load_best("metal", true);
|
||||
ggml_backend_load_best("rpc", true);
|
||||
ggml_backend_load_best("sycl", true);
|
||||
ggml_backend_load_best("vulkan", true);
|
||||
ggml_backend_load_best("musa", true);
|
||||
ggml_backend_load_best("cpu", true);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue