[CANN] Adapt to dynamically loadable backends mechanism (#9970)

* [CANN] Adapt to dynamically loadable backends mechanism

* Fix the Bug: inference running result is garbled in debug running model for LM models who's type is Q4_0 class

* Handle the review comments of this pull request
This commit is contained in:
leo-pony 2024-10-22 16:16:01 +08:00 committed by GitHub
parent 674804a996
commit 6b8447352d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 267 additions and 149 deletions

View file

@ -561,6 +561,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
# include "ggml-amx.h"
#endif
#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif
struct ggml_backend_registry {
std::vector<ggml_backend_reg_t> backends;
std::vector<ggml_backend_dev_t> devices;
@ -587,8 +591,11 @@ struct ggml_backend_registry {
#ifdef GGML_USE_AMX
register_backend(ggml_backend_amx_reg());
#endif
#ifdef GGML_USE_CANN
register_backend(ggml_backend_cann_reg());
#endif
// TODO: kompute, cann
// TODO: kompute
register_backend(ggml_backend_cpu_reg());
}