From 5b7c57528232a7d3c9525541a38ab43e2cd143e8 Mon Sep 17 00:00:00 2001
From: huafengchun <huafengchun@gmail.com>
Date: Wed, 17 Jul 2024 08:16:21 +0000
Subject: [PATCH] Add logging for CANN backend

---
 examples/llava/clip.cpp  |  2 +-
 ggml/include/ggml-cann.h | 11 +++++
 ggml/src/ggml-cann.cpp   | 93 +++++++++++++++++++++++++++++++++-------
 src/llama.cpp            |  4 ++
 4 files changed, 94 insertions(+), 16 deletions(-)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 457118187..d23e282fb 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -1007,7 +1007,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
 
 #ifdef GGML_USE_CANN
     new_clip->backend = ggml_backend_cann_init(0);
-    printf("%s: CLIP using CANN backend\n", __func__);
+    LOG_TEE("%s: CLIP using CANN backend\n", __func__);
 #endif
 
 
diff --git a/ggml/include/ggml-cann.h b/ggml/include/ggml-cann.h
index cc83ed710..ca73211fe 100644
--- a/ggml/include/ggml-cann.h
+++ b/ggml/include/ggml-cann.h
@@ -109,6 +109,17 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
                                                             size_t* free,
                                                             size_t* total);
 
+/**
+ * @brief Set the logging callback for GGML.
+ *
+ * This function sets the logging callback and user data for logging.
+ *
+ * @param log_callback The logging callback to set.
+ * @param user_data User data to pass to the logging callback.
+ */
+GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
+                                                 void* user_data);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/ggml/src/ggml-cann.cpp b/ggml/src/ggml-cann.cpp
index 5f43d2a2c..9bf7e332a 100644
--- a/ggml/src/ggml-cann.cpp
+++ b/ggml/src/ggml-cann.cpp
@@ -23,6 +23,7 @@
 #include "ggml-cann.h"
 
 #include <acl/acl.h>
+#include <stdarg.h>
 
 #include <cmath>
 #include <cstdio>
@@ -37,6 +38,69 @@
 
 #include "ggml-common.h"
 
+/**
+ * @brief Default logging callback for GGML.
+ *
+ * This function is the default logging callback that logs messages to stderr.
+ *
+ * @param level The log level.
+ * @param msg The log message.
+ * @param user_data User data passed to the callback.
+ */
+static void ggml_cann_default_log_callback(enum ggml_log_level level,
+                                           const char* msg, void* user_data) {
+    GGML_UNUSED(level);
+    GGML_UNUSED(user_data);
+    fprintf(stderr, "%s", msg);
+}
+
+ggml_log_callback ggml_cann_log_callback = ggml_cann_default_log_callback;
+void* ggml_cann_log_user_data = NULL;
+
+GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
+                                                 void* user_data) {
+    ggml_cann_log_callback = log_callback;
+    ggml_cann_log_user_data = user_data;
+}
+
+#define GGML_CANN_LOG_INFO(...) ggml_cann_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
+#define GGML_CANN_LOG_WARN(...) ggml_cann_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
+#define GGML_CANN_LOG_ERROR(...) \
+    ggml_cann_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
+
+GGML_ATTRIBUTE_FORMAT(2, 3)
+
+/**
+ * @brief Log a message using the current logging callback.
+ *
+ * This function formats a log message and passes it to the current logging
+ * callback.
+ *
+ * @param level The log level.
+ * @param format The format string for the log message.
+ * @param ... The arguments for the format string.
+ */
+static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {
+    if (ggml_cann_log_callback != NULL) {
+        va_list args;
+        va_start(args, format);
+        char buffer[128];
+        int len = vsnprintf(buffer, 128, format, args);
+        if (len < 128) {
+            ggml_cann_log_callback(level, buffer, ggml_cann_log_user_data);
+        } else {
+             // vsnprintf adds a null terminator
+            std::vector<char> buffer2(len + 1);
+            va_end(args);
+            va_start(args, format);
+            vsnprintf(&buffer2[0], buffer2.size(), format, args);
+            ggml_cann_log_callback(level, buffer2.data(),
+                                   ggml_cann_log_user_data);
+        }
+        va_end(args);
+    }
+}
+
 /**
  * @brief Handles CANN errors by printing an error message and aborting.
  *
@@ -51,10 +115,10 @@
     int32_t id = -1;
     aclrtGetDevice(&id);
 
-    fprintf(stderr, "CANN error: %s\n", msg);
-    fprintf(stderr, "  current device: %d, in function %s at %s:%d\n", id, func,
+    GGML_CANN_LOG_ERROR("CANN error: %s\n", msg);
+    GGML_CANN_LOG_ERROR("  current device: %d, in function %s at %s:%d\n", id, func,
             file, line);
-    fprintf(stderr, "  %s\n", stmt);
+    GGML_CANN_LOG_ERROR("  %s\n", stmt);
     // abort with GGML_ASSERT to get a stack trace
     GGML_ASSERT(!"CANN error");
 }
@@ -100,7 +164,7 @@ static ggml_cann_device_info ggml_cann_init() {
     aclError err = aclrtGetDeviceCount((uint32_t*)&info.device_count);
 
     if (err != ACL_SUCCESS) {
-        fprintf(stderr, "%s: failed to initialize CANN: %s\n",
+        GGML_CANN_LOG_ERROR("%s: failed to initialize CANN: %s\n",
                 __func__, aclGetRecentErrMsg());
         return info;
     }
@@ -138,7 +202,7 @@ const ggml_cann_device_info& ggml_cann_info() {
     return info;
 }
 
-// #define DEBUG_CANN_MALLOC
+//#define DEBUG_CANN_MALLOC
 /**
  * @brief A pool of CANN buffers(legacy).
  *
@@ -250,7 +314,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
         *actual_size = look_ahead_size;
         pool_size += look_ahead_size;
 #ifdef DEBUG_CANN_MALLOC
-        printf(
+        GGML_CANN_LOG_INFO(
             "%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, "
             "requested %u MB\n",
             __func__, device, nnz, (uint32_t)(max_size / 1024 / 1024),
@@ -405,8 +469,8 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
             // add to the pool
             pool_size += reserve_size;
 
-            // printf("cann pool[%d]: size increased to %llu MB (reserved %llu
-            // MB)\n",
+            // GGML_CANN_LOG_INFO("cann pool[%d]: size increased to %llu MB (
+            // reserved %llu MB)\n",
             //       device, (unsigned long long) (pool_size/1024/1024),
             //       (unsigned long long) (reserve_size/1024/1024));
         }
@@ -418,7 +482,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
         pool_used += size;
 
 #ifdef DEBUG_CANN_MALLOC
-        printf("cann pool[%d]: allocated %llu bytes at %llx\n", device,
+        GGML_CANN_LOG_INFO("cann pool[%d]: allocated %llu bytes at %llx\n", device,
                (unsigned long long)size, (unsigned long long)ptr);
 #endif
         return ptr;
@@ -432,7 +496,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
      */
     void free(void* ptr, size_t size) override {
 #ifdef DEBUG_CANN_MALLOC
-        printf("cann pool[%d]: freed %llu bytes at %llx\n", device,
+        GGML_CANN_LOG_INFO("cann pool[%d]: freed %llu bytes at %llx\n", device,
                (unsigned long long)size, (unsigned long long)ptr);
 #endif
 
@@ -1034,8 +1098,7 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
     void* dev_ptr;
     aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST);
     if (err != ACL_SUCCESS) {
-        fprintf(
-            stderr,
+        GGML_CANN_LOG_ERROR(
             "%s: allocating %.2f MiB on device %d: aclrtMalloc failed: %s\n",
             __func__, size / 1024.0 / 1024.0, buft_ctx->device,
             aclGetRecentErrMsg());
@@ -1569,7 +1632,7 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
         bool ok = ggml_cann_compute_forward(*cann_ctx, node);
 
         if (!ok) {
-            fprintf(stderr, "%s: error: op not supported %s (%s)\n", __func__,
+            GGML_CANN_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__,
                     node->name, ggml_op_name(node->op));
         }
         GGML_ASSERT(ok);
@@ -1873,13 +1936,13 @@ static ggml_guid_t ggml_backend_cann_guid() {
 GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device) {
     aclInit(nullptr);
     if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
-        fprintf(stderr, "%s: error: invalid device %d\n", __func__, device);
+        GGML_CANN_LOG_ERROR("%s: error: invalid device %d\n", __func__, device);
         return nullptr;
     }
 
     ggml_backend_cann_context* ctx = new ggml_backend_cann_context(device);
     if (ctx == nullptr) {
-        fprintf(stderr, "%s: error: failed to allocate context\n", __func__);
+        GGML_CANN_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
         return nullptr;
     }
 
diff --git a/src/llama.cpp b/src/llama.cpp
index 13c5f10c5..ed387d480 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2081,6 +2081,8 @@ struct llama_state {
         ggml_backend_metal_log_set_callback(log_callback, log_callback_user_data);
 #elif defined(GGML_USE_CUDA)
         ggml_backend_cuda_log_set_callback(log_callback, log_callback_user_data);
+#elif defined(GGML_USE_CANN)
+        ggml_backend_cann_log_set_callback(log_callback, log_callback_user_data);
 #endif
     }
 
@@ -21826,6 +21828,8 @@ void llama_log_set(ggml_log_callback log_callback, void * user_data) {
     ggml_backend_metal_log_set_callback(g_state.log_callback, g_state.log_callback_user_data);
 #elif defined(GGML_USE_CUDA)
     ggml_backend_cuda_log_set_callback(g_state.log_callback, g_state.log_callback_user_data);
+#elif defined(GGML_USE_CANN)
+    ggml_backend_cann_log_set_callback(g_state.log_callback, g_state.log_callback_user_data);
 #endif
 }