From 63dc587dffae40c0cd7f1468859f2d430039a29e Mon Sep 17 00:00:00 2001 From: hongruichen Date: Wed, 17 Jul 2024 13:34:05 +0800 Subject: [PATCH] refactoring: make the buffer alloc and free stay in same class --- ggml/src/ggml-qnn.cpp | 88 +++++++++++++++++++++---------------- ggml/src/ggml-qnn/utils.cpp | 21 ++++++++- ggml/src/ggml-qnn/utils.hpp | 3 ++ 3 files changed, 74 insertions(+), 38 deletions(-) diff --git a/ggml/src/ggml-qnn.cpp b/ggml/src/ggml-qnn.cpp index 46f7e64bc..46fdf87a6 100644 --- a/ggml/src/ggml-qnn.cpp +++ b/ggml/src/ggml-qnn.cpp @@ -83,22 +83,54 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = { ggml_backend_qnn_context(QNN_BACKEND_NPU, 1, "qnn-npu", "libQnnHtp.so"), /* QNN_BACKEND_NPU */ }; -struct ggml_backend_qnn_buffer_context { - ggml_backend_qnn_buffer_context(size_t device) : device(device), name(QNN_BACKEND_NAME + std::to_string(device)) {} +class ggml_backend_qnn_buffer_context { +public: + ggml_backend_qnn_buffer_context(QNNBackend device, std::shared_ptr instance, size_t size) : + _device(device), _instance(instance), _name(QNN_BACKEND_NAME + std::to_string(device)) { - ~ggml_backend_qnn_buffer_context() { - tensors.clear(); - if (buffer) { - free(buffer); + size_t size_page = sysconf(_SC_PAGESIZE); + + // TODO: for qnn npu, a better way here is to reuse the buffer allocated by qnn rpc, will save an extra copy + _buffer = qnn::align_alloc(size_page, size); + + if (!_buffer) { + QNN_LOG_WARN("failed to allocate %.2f MiB\n", float(size / (1 << 20))); + return; } + + _buffer_size = size; } - void *buffer = nullptr; - struct ggml_backend_qnn_context *backend_ctx = nullptr; - std::list> tensors; - size_t buffer_size = 0; - size_t device; - std::string name; + ~ggml_backend_qnn_buffer_context() { + _tensors.clear(); + + // the free will do nothing if the _buffer is nullptr + qnn::align_free(_buffer); + } + + bool is_valid() const { return _buffer != nullptr; } + + bool init_tensor(ggml_tensor *tensor) { + auto qnn_tensor = std::make_unique(tensor, _device, _instance); + if (!qnn_tensor->is_valid()) { + QNN_LOG_WARN("Create ggml_qnn_tensor failed"); + return false; + } + + _tensors.push_back(std::move(qnn_tensor)); + return true; + } + + void *get_buffer() { return _buffer; } + size_t get_buffer_size() { return _buffer_size; } + +private: + QNNBackend _device; + std::shared_ptr _instance; + std::string _name; + std::list> _tensors; + void *_buffer = nullptr; + size_t _buffer_size = 0; }; struct ggml_backend_qnn_buffer_type_context { @@ -189,20 +221,16 @@ GGML_CALL static void ggml_backend_qnn_buffer_free_buffer(ggml_backend_buffer_t GGML_CALL static void *ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) { ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; - return ctx->buffer; + return ctx->get_buffer(); } GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) { ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; - auto instance = ctx->backend_ctx->instance; - auto qnn_tensor = std::make_unique(tensor, (QNNBackend)(ctx->device), instance); - if (!qnn_tensor->is_valid()) { - QNN_LOG_WARN("Create ggml_qnn_tensor failed"); + if (!ctx->init_tensor(tensor)) { + QNN_LOG_WARN("init ggml_qnn_tensor failed"); return; } - - ctx->tensors.push_back(std::move(qnn_tensor)); } GGML_CALL static void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, @@ -232,7 +260,7 @@ GGML_CALL static bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t b GGML_CALL static void ggml_backend_qnn_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; - memset(ctx->buffer, value, ctx->buffer_size); + memset(ctx->get_buffer(), value, ctx->get_buffer_size()); } static ggml_backend_buffer_i ggml_backend_qnn_buffer_interface = { @@ -263,23 +291,9 @@ static void *ggml_qnn_host_malloc(size_t n) { GGML_CALL static ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { ggml_backend_qnn_buffer_type_context *buft_ctx = (ggml_backend_qnn_buffer_type_context *)buft->context; - ggml_backend_qnn_buffer_context *ctx = new ggml_backend_qnn_buffer_context(buft_ctx->device); - - size_t size_page = sysconf(_SC_PAGESIZE); - - size_t size_aligned = size; - if ((size_aligned % size_page) != 0) { - size_aligned += (size_page - (size_aligned % size_page)); - } - - // TODO:use pre-allocated buffer in internal memory pool - ctx->buffer = ggml_qnn_host_malloc(size_aligned); - ctx->buffer_size = size_aligned; - - ctx->backend_ctx = &g_qnn_mgr[buft_ctx->device]; - - if (nullptr == ctx->buffer) { - QNN_LOG_WARN("%s: failed to allocate %.2f MiB\n", __func__, size / (1 << 20)); + ggml_backend_qnn_buffer_context *ctx = + new ggml_backend_qnn_buffer_context((QNNBackend)buft_ctx->device, g_qnn_mgr[buft_ctx->device].instance, size); + if (!ctx->is_valid()) { return nullptr; } diff --git a/ggml/src/ggml-qnn/utils.cpp b/ggml/src/ggml-qnn/utils.cpp index 7c25314f7..2b594bfa0 100644 --- a/ggml/src/ggml-qnn/utils.cpp +++ b/ggml/src/ggml-qnn/utils.cpp @@ -1,6 +1,8 @@ #include "utils.hpp" +#include + #include "ggml-qnn.h" #include "qnn-types.hpp" @@ -111,7 +113,7 @@ const char *get_htparch_desc(size_t htp_arch) { intptr_t align_to(size_t alignment, intptr_t offset) { return offset % alignment == 0 ? offset - : offset + (static_cast(alignment) - offset % static_cast(alignment)); + : offset + (static_cast(alignment) - (offset % static_cast(alignment))); } uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { @@ -127,6 +129,23 @@ uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { return ggml_nbytes(tensor); } +void *align_alloc(size_t alignment, size_t size) { + size_t size_aligned = size; + if ((size_aligned % alignment) != 0) { + size_aligned += (alignment - (size_aligned % alignment)); + } + + void *data = std::aligned_alloc(alignment, size_aligned); + if (!data) { + QNN_LOG_WARN("aligned_alloc failed\n"); + return nullptr; + } + + return data; +} + +void align_free(void *ptr) { std::free(ptr); } + // ================================================================================================= // // QNN backend internal helper functions diff --git a/ggml/src/ggml-qnn/utils.hpp b/ggml/src/ggml-qnn/utils.hpp index 66c3eeba4..b264f2326 100644 --- a/ggml/src/ggml-qnn/utils.hpp +++ b/ggml/src/ggml-qnn/utils.hpp @@ -22,6 +22,9 @@ const char *get_htparch_desc(size_t htp_arch); intptr_t align_to(size_t alignment, intptr_t offset); uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor); +void *align_alloc(size_t alignment, size_t size); +void align_free(void *ptr); + const char *opname_from_ggmlop(enum ggml_op ggmlop); inline int validate_tensor_version(const Qnn_Tensor_t &tensor) {