From 5b7c57528232a7d3c9525541a38ab43e2cd143e8 Mon Sep 17 00:00:00 2001 From: huafengchun Date: Wed, 17 Jul 2024 08:16:21 +0000 Subject: [PATCH] Add logging for CANN backend --- examples/llava/clip.cpp | 2 +- ggml/include/ggml-cann.h | 11 +++++ ggml/src/ggml-cann.cpp | 93 +++++++++++++++++++++++++++++++++------- src/llama.cpp | 4 ++ 4 files changed, 94 insertions(+), 16 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 457118187..d23e282fb 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -1007,7 +1007,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { #ifdef GGML_USE_CANN new_clip->backend = ggml_backend_cann_init(0); - printf("%s: CLIP using CANN backend\n", __func__); + LOG_TEE("%s: CLIP using CANN backend\n", __func__); #endif diff --git a/ggml/include/ggml-cann.h b/ggml/include/ggml-cann.h index cc83ed710..ca73211fe 100644 --- a/ggml/include/ggml-cann.h +++ b/ggml/include/ggml-cann.h @@ -109,6 +109,17 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device, size_t* free, size_t* total); +/** + * @brief Set the logging callback for GGML. + * + * This function sets the logging callback and user data for logging. + * + * @param log_callback The logging callback to set. + * @param user_data User data to pass to the logging callback. + */ +GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback, + void* user_data); + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-cann.cpp b/ggml/src/ggml-cann.cpp index 5f43d2a2c..9bf7e332a 100644 --- a/ggml/src/ggml-cann.cpp +++ b/ggml/src/ggml-cann.cpp @@ -23,6 +23,7 @@ #include "ggml-cann.h" #include +#include #include #include @@ -37,6 +38,69 @@ #include "ggml-common.h" +/** + * @brief Default logging callback for GGML. + * + * This function is the default logging callback that logs messages to stderr. + * + * @param level The log level. + * @param msg The log message. + * @param user_data User data passed to the callback. + */ +static void ggml_cann_default_log_callback(enum ggml_log_level level, + const char* msg, void* user_data) { + GGML_UNUSED(level); + GGML_UNUSED(user_data); + fprintf(stderr, "%s", msg); +} + +ggml_log_callback ggml_cann_log_callback = ggml_cann_default_log_callback; +void* ggml_cann_log_user_data = NULL; + +GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback, + void* user_data) { + ggml_cann_log_callback = log_callback; + ggml_cann_log_user_data = user_data; +} + +#define GGML_CANN_LOG_INFO(...) ggml_cann_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__) +#define GGML_CANN_LOG_WARN(...) ggml_cann_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__) +#define GGML_CANN_LOG_ERROR(...) \ + ggml_cann_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) + +GGML_ATTRIBUTE_FORMAT(2, 3) + +/** + * @brief Log a message using the current logging callback. + * + * This function formats a log message and passes it to the current logging + * callback. + * + * @param level The log level. + * @param format The format string for the log message. + * @param ... The arguments for the format string. + */ +static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) { + if (ggml_cann_log_callback != NULL) { + va_list args; + va_start(args, format); + char buffer[128]; + int len = vsnprintf(buffer, 128, format, args); + if (len < 128) { + ggml_cann_log_callback(level, buffer, ggml_cann_log_user_data); + } else { + // vsnprintf adds a null terminator + std::vector buffer2(len + 1); + va_end(args); + va_start(args, format); + vsnprintf(&buffer2[0], buffer2.size(), format, args); + ggml_cann_log_callback(level, buffer2.data(), + ggml_cann_log_user_data); + } + va_end(args); + } +} + /** * @brief Handles CANN errors by printing an error message and aborting. * @@ -51,10 +115,10 @@ int32_t id = -1; aclrtGetDevice(&id); - fprintf(stderr, "CANN error: %s\n", msg); - fprintf(stderr, " current device: %d, in function %s at %s:%d\n", id, func, + GGML_CANN_LOG_ERROR("CANN error: %s\n", msg); + GGML_CANN_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line); - fprintf(stderr, " %s\n", stmt); + GGML_CANN_LOG_ERROR(" %s\n", stmt); // abort with GGML_ASSERT to get a stack trace GGML_ASSERT(!"CANN error"); } @@ -100,7 +164,7 @@ static ggml_cann_device_info ggml_cann_init() { aclError err = aclrtGetDeviceCount((uint32_t*)&info.device_count); if (err != ACL_SUCCESS) { - fprintf(stderr, "%s: failed to initialize CANN: %s\n", + GGML_CANN_LOG_ERROR("%s: failed to initialize CANN: %s\n", __func__, aclGetRecentErrMsg()); return info; } @@ -138,7 +202,7 @@ const ggml_cann_device_info& ggml_cann_info() { return info; } -// #define DEBUG_CANN_MALLOC +//#define DEBUG_CANN_MALLOC /** * @brief A pool of CANN buffers(legacy). * @@ -250,7 +314,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { *actual_size = look_ahead_size; pool_size += look_ahead_size; #ifdef DEBUG_CANN_MALLOC - printf( + GGML_CANN_LOG_INFO( "%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, " "requested %u MB\n", __func__, device, nnz, (uint32_t)(max_size / 1024 / 1024), @@ -405,8 +469,8 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { // add to the pool pool_size += reserve_size; - // printf("cann pool[%d]: size increased to %llu MB (reserved %llu - // MB)\n", + // GGML_CANN_LOG_INFO("cann pool[%d]: size increased to %llu MB ( + // reserved %llu MB)\n", // device, (unsigned long long) (pool_size/1024/1024), // (unsigned long long) (reserve_size/1024/1024)); } @@ -418,7 +482,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { pool_used += size; #ifdef DEBUG_CANN_MALLOC - printf("cann pool[%d]: allocated %llu bytes at %llx\n", device, + GGML_CANN_LOG_INFO("cann pool[%d]: allocated %llu bytes at %llx\n", device, (unsigned long long)size, (unsigned long long)ptr); #endif return ptr; @@ -432,7 +496,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { */ void free(void* ptr, size_t size) override { #ifdef DEBUG_CANN_MALLOC - printf("cann pool[%d]: freed %llu bytes at %llx\n", device, + GGML_CANN_LOG_INFO("cann pool[%d]: freed %llu bytes at %llx\n", device, (unsigned long long)size, (unsigned long long)ptr); #endif @@ -1034,8 +1098,7 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, void* dev_ptr; aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST); if (err != ACL_SUCCESS) { - fprintf( - stderr, + GGML_CANN_LOG_ERROR( "%s: allocating %.2f MiB on device %d: aclrtMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, aclGetRecentErrMsg()); @@ -1569,7 +1632,7 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute( bool ok = ggml_cann_compute_forward(*cann_ctx, node); if (!ok) { - fprintf(stderr, "%s: error: op not supported %s (%s)\n", __func__, + GGML_CANN_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op)); } GGML_ASSERT(ok); @@ -1873,13 +1936,13 @@ static ggml_guid_t ggml_backend_cann_guid() { GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device) { aclInit(nullptr); if (device < 0 || device >= ggml_backend_cann_get_device_count()) { - fprintf(stderr, "%s: error: invalid device %d\n", __func__, device); + GGML_CANN_LOG_ERROR("%s: error: invalid device %d\n", __func__, device); return nullptr; } ggml_backend_cann_context* ctx = new ggml_backend_cann_context(device); if (ctx == nullptr) { - fprintf(stderr, "%s: error: failed to allocate context\n", __func__); + GGML_CANN_LOG_ERROR("%s: error: failed to allocate context\n", __func__); return nullptr; } diff --git a/src/llama.cpp b/src/llama.cpp index 13c5f10c5..ed387d480 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2081,6 +2081,8 @@ struct llama_state { ggml_backend_metal_log_set_callback(log_callback, log_callback_user_data); #elif defined(GGML_USE_CUDA) ggml_backend_cuda_log_set_callback(log_callback, log_callback_user_data); +#elif defined(GGML_USE_CANN) + ggml_backend_cann_log_set_callback(log_callback, log_callback_user_data); #endif } @@ -21826,6 +21828,8 @@ void llama_log_set(ggml_log_callback log_callback, void * user_data) { ggml_backend_metal_log_set_callback(g_state.log_callback, g_state.log_callback_user_data); #elif defined(GGML_USE_CUDA) ggml_backend_cuda_log_set_callback(g_state.log_callback, g_state.log_callback_user_data); +#elif defined(GGML_USE_CANN) + ggml_backend_cann_log_set_callback(g_state.log_callback, g_state.log_callback_user_data); #endif }