ggml : add support for dynamic loading of backends (#10469)
* ggml : add support for dynamic loading of backends --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
f6d12e7df8
commit
5931c1f233
44 changed files with 728 additions and 272 deletions
|
@ -16,7 +16,6 @@
|
|||
|
||||
|
||||
#include <ggml.h>
|
||||
#include <ggml-cpu.h>
|
||||
#include <ggml-alloc.h>
|
||||
#include <ggml-backend.h>
|
||||
|
||||
|
@ -26,7 +25,6 @@
|
|||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <cinttypes>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <stdio.h>
|
||||
|
@ -639,19 +637,20 @@ struct test_case {
|
|||
|
||||
// determine number of runs
|
||||
int n_runs;
|
||||
bool is_cpu = ggml_backend_dev_type(ggml_backend_get_device(backend)) == GGML_BACKEND_DEVICE_TYPE_CPU;
|
||||
if (op_flops(out) > 0) {
|
||||
// based on flops
|
||||
const uint64_t GFLOP = 1000 * 1000 * 1000;
|
||||
const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
||||
const uint64_t target_flops_gpu = 100ULL * GFLOP;
|
||||
uint64_t target_flops = ggml_backend_is_cpu(backend) ? target_flops_cpu : target_flops_gpu;
|
||||
uint64_t target_flops = is_cpu ? target_flops_cpu : target_flops_gpu;
|
||||
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
||||
} else {
|
||||
// based on memory size
|
||||
const size_t GB = 1ULL << 30;
|
||||
const size_t target_size_cpu = 8 * GB;
|
||||
const size_t target_size_gpu = 32 * GB;
|
||||
size_t target_size = ggml_backend_is_cpu(backend) ? target_size_cpu : target_size_gpu;
|
||||
size_t target_size = is_cpu ? target_size_cpu : target_size_gpu;
|
||||
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
||||
}
|
||||
|
||||
|
@ -3873,7 +3872,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name) {
|
||||
if (mode == MODE_TEST) {
|
||||
auto test_cases = make_test_cases_eval();
|
||||
ggml_backend_t backend_cpu = ggml_backend_cpu_init();
|
||||
ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
|
||||
if (backend_cpu == NULL) {
|
||||
printf(" Failed to initialize CPU backend\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t n_ok = 0;
|
||||
for (auto & test : test_cases) {
|
||||
|
@ -3953,7 +3956,9 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// enumerate backends
|
||||
// load and enumerate backends
|
||||
ggml_backend_load_all();
|
||||
|
||||
printf("Testing %zu devices\n\n", ggml_backend_dev_count());
|
||||
|
||||
size_t n_ok = 0;
|
||||
|
@ -3969,16 +3974,15 @@ int main(int argc, char ** argv) {
|
|||
continue;
|
||||
}
|
||||
|
||||
ggml_backend_t backend = ggml_backend_dev_init(dev, NULL);
|
||||
GGML_ASSERT(backend != NULL);
|
||||
|
||||
if (backend_filter == NULL && ggml_backend_is_cpu(backend) && mode != MODE_GRAD) {
|
||||
if (backend_filter == NULL && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU && mode != MODE_GRAD) {
|
||||
printf(" Skipping CPU backend\n");
|
||||
ggml_backend_free(backend);
|
||||
n_ok++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_backend_t backend = ggml_backend_dev_init(dev, NULL);
|
||||
GGML_ASSERT(backend != NULL);
|
||||
|
||||
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
||||
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
if (ggml_backend_set_n_threads_fn) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue