ggml : add support for dynamic loading of backends (#10469)

* ggml : add support for dynamic loading of backends

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Diego Devesa 2024-11-25 15:13:39 +01:00 committed by GitHub
parent f6d12e7df8
commit 5931c1f233
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
44 changed files with 728 additions and 272 deletions

View file

@ -541,16 +541,12 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
return &ggml_backend_cpu_device;
}
struct ggml_backend_feature {
const char * name;
const char * value;
};
// Not used yet
// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
// and additionally to allow other backends to expose their own list of features that applications can query using the same API.
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
static std::vector<ggml_backend_feature> features = []() {
ggml_cpu_init();
std::vector<ggml_backend_feature> features;
if (ggml_cpu_has_sse3()) {
features.push_back({ "SSE3", "1" });
@ -561,6 +557,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if (ggml_cpu_has_avx()) {
features.push_back({ "AVX", "1" });
}
if (ggml_cpu_has_avx_vnni()) {
features.push_back({ "AVX_VNNI", "1" });
}
if (ggml_cpu_has_avx2()) {
features.push_back({ "AVX2", "1" });
}
@ -570,9 +569,6 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if (ggml_cpu_has_fma()) {
features.push_back({ "FMA", "1" });
}
if (ggml_cpu_has_avx_vnni()) {
features.push_back({ "AVX_VNNI", "1" });
}
if (ggml_cpu_has_avx512()) {
features.push_back({ "AVX512", "1" });
}
@ -619,6 +615,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if (ggml_cpu_has_llamafile()) {
features.push_back({ "LLAMAFILE", "1" });
}
// TODO: rename this
#ifdef GGML_USE_CPU_AARCH64
features.push_back({ "AARCH64_REPACK", "1" });
#endif
features.push_back({ nullptr, nullptr });
@ -637,6 +637,29 @@ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const ch
if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
return (void *)ggml_backend_cpu_get_extra_bufts;
}
if (strcmp(name, "ggml_backend_get_features") == 0) {
return (void *)ggml_backend_cpu_get_features;
}
if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
return (void *)ggml_backend_cpu_set_abort_callback;
}
if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
return (void *)ggml_numa_init;
}
if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
return (void *)ggml_is_numa;
}
// threadpool - TODO: move to ggml-base
if (strcmp(name, "ggml_threadpool_new") == 0) {
return (void *)ggml_threadpool_new;
}
if (strcmp(name, "ggml_threadpool_free") == 0) {
return (void *)ggml_threadpool_free;
}
if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
return (void *)ggml_backend_cpu_set_threadpool;
}
return NULL;
@ -655,9 +678,12 @@ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
ggml_cpu_init();
static struct ggml_backend_reg ggml_backend_cpu_reg = {
/* .iface = */ ggml_backend_cpu_reg_i,
/* .context = */ NULL,
/* .api_version = */ GGML_BACKEND_API_VERSION,
/* .iface = */ ggml_backend_cpu_reg_i,
/* .context = */ NULL,
};
return &ggml_backend_cpu_reg;
}
GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)