ggml : add support for dynamic loading of backends (#10469)
* ggml : add support for dynamic loading of backends --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
f6d12e7df8
commit
5931c1f233
44 changed files with 728 additions and 272 deletions
|
@ -3126,6 +3126,61 @@ static ggml_backend_dev_t ggml_backend_cuda_reg_get_device(ggml_backend_reg_t re
|
|||
return ctx->devices[index];
|
||||
}
|
||||
|
||||
static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t reg) {
|
||||
static std::vector<ggml_backend_feature> features = []() {
|
||||
std::vector<ggml_backend_feature> features;
|
||||
#define _STRINGIFY(...) #__VA_ARGS__
|
||||
#define STRINGIFY(...) _STRINGIFY(__VA_ARGS__)
|
||||
|
||||
#ifdef __CUDA_ARCH_LIST__
|
||||
features.push_back({ "ARCHS", STRINGIFY(__CUDA_ARCH_LIST__) });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_FORCE_MMQ
|
||||
features.push_back({ "FORCE_MMQ", "1" });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_FORCE_CUBLAS
|
||||
features.push_back({ "FORCE_CUBLAS", "1" });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_NO_VMM
|
||||
features.push_back({ "NO_VMM", "1" });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||
features.push_back({ "NO_PEER_COPY", "1" });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_F16
|
||||
features.push_back({ "F16", "1" });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_USE_GRAPHS
|
||||
features.push_back({ "USE_GRAPHS", "1" });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
|
||||
features.push_back({ "PEER_MAX_BATCH_SIZE", STRINGIFY(GGML_CUDA_PEER_MAX_BATCH_SIZE) });
|
||||
#endif
|
||||
|
||||
#ifdef GGML_CUDA_FA_ALL_QUANTS
|
||||
features.push_back({ "FA_ALL_QUANTS", "1" });
|
||||
#endif
|
||||
|
||||
#undef _STRINGIFY
|
||||
#undef STRINGIFY
|
||||
|
||||
features.push_back({ nullptr, nullptr });
|
||||
|
||||
return features;
|
||||
}();
|
||||
|
||||
return features.data();
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
}
|
||||
|
||||
static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
||||
GGML_UNUSED(reg);
|
||||
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
||||
|
@ -3137,6 +3192,9 @@ static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, con
|
|||
if (strcmp(name, "ggml_backend_unregister_host_buffer") == 0) {
|
||||
return (void *)ggml_backend_cuda_unregister_host_buffer;
|
||||
}
|
||||
if (strcmp(name, "ggml_backend_get_features") == 0) {
|
||||
return (void *)ggml_backend_cuda_get_features;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -3169,16 +3227,17 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
|||
dev_ctx->description = prop.name;
|
||||
|
||||
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||
/* .interface = */ ggml_backend_cuda_device_interface,
|
||||
/* .reg = */ ®,
|
||||
/* .context = */ dev_ctx
|
||||
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||
/* .reg = */ ®,
|
||||
/* .context = */ dev_ctx
|
||||
};
|
||||
ctx->devices.push_back(dev);
|
||||
}
|
||||
|
||||
reg = ggml_backend_reg {
|
||||
/* .interface = */ ggml_backend_cuda_reg_interface,
|
||||
/* .context = */ ctx
|
||||
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
||||
/* .iface = */ ggml_backend_cuda_reg_interface,
|
||||
/* .context = */ ctx
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -3209,3 +3268,5 @@ ggml_backend_t ggml_backend_cuda_init(int device) {
|
|||
|
||||
return cuda_backend;
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_IMPL(ggml_backend_cuda_reg)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue