diff --git a/ggml/include/ggml-cann.h b/ggml/include/ggml-cann.h index 2ec0a26ed..528975493 100644 --- a/ggml/include/ggml-cann.h +++ b/ggml/include/ggml-cann.h @@ -33,7 +33,6 @@ extern "C" { * @brief Maximum number of CANN devices supported. */ #define GGML_CANN_MAX_DEVICES 16 -#define GGML_CANN_NAME "CANN" GGML_API ggml_backend_reg_t ggml_backend_cann_reg(void); diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 8440f66f5..7d7b63a15 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -588,14 +588,13 @@ struct ggml_backend_registry { #ifdef GGML_USE_RPC register_backend(ggml_backend_rpc_reg()); #endif - #ifdef GGML_USE_AMX register_backend(ggml_backend_amx_reg()); #endif - #ifdef GGML_USE_CANN register_backend(ggml_backend_cann_reg()); #endif + // TODO: kompute register_backend(ggml_backend_cpu_reg()); diff --git a/ggml/src/ggml-cann.cpp b/ggml/src/ggml-cann.cpp index 3ebea6702..af0fb603a 100644 --- a/ggml/src/ggml-cann.cpp +++ b/ggml/src/ggml-cann.cpp @@ -39,6 +39,8 @@ #include "ggml-common.h" +#define GGML_CANN_NAME "CANN" + /** * @brief Handles CANN errors by printing an error message and aborting. * diff --git a/src/llama.cpp b/src/llama.cpp index 6d9696a2e..297de81f9 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -10,8 +10,6 @@ #if defined(GGML_USE_KOMPUTE) # include "ggml-kompute.h" -#elif defined(GGML_USE_CANN) -# include "ggml-cann.h" #endif #ifndef __AMX_INT8__ @@ -3416,11 +3414,7 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(const llama_mode } } -#if defined(GGML_USE_CANN) - if (host_buffer) { - buft = ggml_backend_cann_host_buffer_type(); - } -#elif defined(GGML_USE_CPU_HBM) +#if defined(GGML_USE_CPU_HBM) buft = ggml_backend_cpu_hbm_buffer_type(); #endif @@ -3442,8 +3436,6 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_ #if defined(GGML_USE_KOMPUTE) buft = ggml_backend_kompute_buffer_type(device); -#elif defined(GGML_USE_CANN) - buft = ggml_backend_cann_buffer_type(device); #endif if (buft == nullptr) { @@ -3487,14 +3479,13 @@ static size_t llama_get_device_memory(const llama_model & model, int device) { return free; } -#if defined(GGML_USE_CANN) - size_t total; - size_t free; - ggml_backend_cann_get_device_memory(device, &free, &total); - return free; -#else + if (model.devices.size() > 0) { + ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(model.devices[0]); + LLAMA_LOG_WARN("%s: failed to get free memmory of device:%d of backend:%s, for device id is out of range.\n", __func__, device, ggml_backend_reg_name(reg)); + } else { + LLAMA_LOG_WARN("%s: failed to get free memmory of device, no devices in inputted model.\n", __func__); + } return 1; -#endif GGML_UNUSED(model); GGML_UNUSED(device);