ggml : add support for dynamic loading of backends (#10469)
* ggml : add support for dynamic loading of backends --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
f6d12e7df8
commit
5931c1f233
44 changed files with 728 additions and 272 deletions
|
@ -1,14 +1,13 @@
|
|||
add_library(ggml-cpu
|
||||
ggml-cpu.c
|
||||
ggml-cpu.cpp
|
||||
ggml-cpu-aarch64.c
|
||||
ggml-cpu-aarch64.h
|
||||
ggml-cpu-quants.c
|
||||
ggml-cpu-quants.h
|
||||
)
|
||||
ggml_add_backend_library(ggml-cpu
|
||||
ggml-cpu.c
|
||||
ggml-cpu.cpp
|
||||
ggml-cpu-aarch64.c
|
||||
ggml-cpu-aarch64.h
|
||||
ggml-cpu-quants.c
|
||||
ggml-cpu-quants.h
|
||||
)
|
||||
|
||||
target_link_libraries(ggml-cpu PRIVATE ggml-base)
|
||||
target_include_directories(ggml-cpu PRIVATE . ..)
|
||||
target_include_directories(ggml-cpu PRIVATE .)
|
||||
|
||||
if (APPLE AND GGML_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
|
|
|
@ -13578,29 +13578,6 @@ static void ggml_graph_compute_kickoff(struct ggml_threadpool * threadpool, int
|
|||
|
||||
#endif // GGML_USE_OPENMP
|
||||
|
||||
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
|
||||
p->n_threads = n_threads;
|
||||
p->prio = 0; // default priority (usually means normal or inherited)
|
||||
p->poll = 50; // hybrid-polling enabled
|
||||
p->strict_cpu = false; // no strict placement (all threads share same cpumask)
|
||||
p->paused = false; // threads are ready to go
|
||||
memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
|
||||
}
|
||||
|
||||
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
|
||||
struct ggml_threadpool_params p;
|
||||
ggml_threadpool_params_init(&p, n_threads);
|
||||
return p;
|
||||
}
|
||||
|
||||
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
||||
if (p0->n_threads != p1->n_threads ) return false;
|
||||
if (p0->prio != p1->prio ) return false;
|
||||
if (p0->poll != p1->poll ) return false;
|
||||
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
||||
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
||||
}
|
||||
|
||||
static struct ggml_threadpool * ggml_threadpool_new_impl(
|
||||
struct ggml_threadpool_params * tpp,
|
||||
struct ggml_cgraph * cgraph,
|
||||
|
|
|
@ -541,16 +541,12 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
|
|||
return &ggml_backend_cpu_device;
|
||||
}
|
||||
|
||||
struct ggml_backend_feature {
|
||||
const char * name;
|
||||
const char * value;
|
||||
};
|
||||
|
||||
// Not used yet
|
||||
// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
|
||||
// and additionally to allow other backends to expose their own list of features that applications can query using the same API.
|
||||
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
||||
static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
|
||||
static std::vector<ggml_backend_feature> features = []() {
|
||||
ggml_cpu_init();
|
||||
|
||||
std::vector<ggml_backend_feature> features;
|
||||
if (ggml_cpu_has_sse3()) {
|
||||
features.push_back({ "SSE3", "1" });
|
||||
|
@ -561,6 +557,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|||
if (ggml_cpu_has_avx()) {
|
||||
features.push_back({ "AVX", "1" });
|
||||
}
|
||||
if (ggml_cpu_has_avx_vnni()) {
|
||||
features.push_back({ "AVX_VNNI", "1" });
|
||||
}
|
||||
if (ggml_cpu_has_avx2()) {
|
||||
features.push_back({ "AVX2", "1" });
|
||||
}
|
||||
|
@ -570,9 +569,6 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|||
if (ggml_cpu_has_fma()) {
|
||||
features.push_back({ "FMA", "1" });
|
||||
}
|
||||
if (ggml_cpu_has_avx_vnni()) {
|
||||
features.push_back({ "AVX_VNNI", "1" });
|
||||
}
|
||||
if (ggml_cpu_has_avx512()) {
|
||||
features.push_back({ "AVX512", "1" });
|
||||
}
|
||||
|
@ -619,6 +615,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|||
if (ggml_cpu_has_llamafile()) {
|
||||
features.push_back({ "LLAMAFILE", "1" });
|
||||
}
|
||||
// TODO: rename this
|
||||
#ifdef GGML_USE_CPU_AARCH64
|
||||
features.push_back({ "AARCH64_REPACK", "1" });
|
||||
#endif
|
||||
|
||||
features.push_back({ nullptr, nullptr });
|
||||
|
||||
|
@ -637,6 +637,29 @@ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const ch
|
|||
if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
|
||||
return (void *)ggml_backend_cpu_get_extra_bufts;
|
||||
}
|
||||
if (strcmp(name, "ggml_backend_get_features") == 0) {
|
||||
return (void *)ggml_backend_cpu_get_features;
|
||||
}
|
||||
if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
|
||||
return (void *)ggml_backend_cpu_set_abort_callback;
|
||||
}
|
||||
if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
|
||||
return (void *)ggml_numa_init;
|
||||
}
|
||||
if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
|
||||
return (void *)ggml_is_numa;
|
||||
}
|
||||
|
||||
// threadpool - TODO: move to ggml-base
|
||||
if (strcmp(name, "ggml_threadpool_new") == 0) {
|
||||
return (void *)ggml_threadpool_new;
|
||||
}
|
||||
if (strcmp(name, "ggml_threadpool_free") == 0) {
|
||||
return (void *)ggml_threadpool_free;
|
||||
}
|
||||
if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
|
||||
return (void *)ggml_backend_cpu_set_threadpool;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
|
@ -655,9 +678,12 @@ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
|
|||
ggml_cpu_init();
|
||||
|
||||
static struct ggml_backend_reg ggml_backend_cpu_reg = {
|
||||
/* .iface = */ ggml_backend_cpu_reg_i,
|
||||
/* .context = */ NULL,
|
||||
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
||||
/* .iface = */ ggml_backend_cpu_reg_i,
|
||||
/* .context = */ NULL,
|
||||
};
|
||||
|
||||
return &ggml_backend_cpu_reg;
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue