diff --git a/ggml/src/ggml-qnn/buffer.hpp b/ggml/src/ggml-qnn/buffer.hpp new file mode 100644 index 000000000..db8e8ccaf --- /dev/null +++ b/ggml/src/ggml-qnn/buffer.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +#include "logger.hpp" +#include "qnn-lib.hpp" + +namespace qnn { +class ggml_qnn_rpc_buffer { +public: + ggml_qnn_rpc_buffer(std::shared_ptr qnn_instance, size_t size, uint32_t rank, uint32_t *dimensions, + Qnn_DataType_t data_type) : + _qnn_instance(qnn_instance), _size(size) { + + auto *qnn_rpc_buffer = static_cast(qnn_instance->alloc_rpcmem(size, alignof(void *))); + _qnn_rpc_mem_handle = qnn_instance->register_rpcmem(qnn_rpc_buffer, rank, dimensions, data_type); + if (!_qnn_rpc_mem_handle) { + qnn_instance->free_rpcmem(qnn_rpc_buffer); + QNN_LOG_WARN("register rpc mem failure\n"); + return; + } + + _qnn_rpc_buffer = qnn_rpc_buffer; + QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d\n", _qnn_rpc_buffer, (int)size); + } + ~ggml_qnn_rpc_buffer() { + if (_qnn_instance) { + if (_qnn_rpc_mem_handle) { + _qnn_instance->unregister_rpcmem(_qnn_rpc_mem_handle); + } + + if (_qnn_rpc_buffer) { + _qnn_instance->free_rpcmem(_qnn_rpc_buffer); + } + } + } + + bool is_valid() const { return _qnn_rpc_buffer && _qnn_rpc_mem_handle; } + + uint8_t *get_buffer() const { return _qnn_rpc_buffer; } + size_t get_size() const { return _size; } + Qnn_MemHandle_t get_mem_handle() const { return _qnn_rpc_mem_handle; } + +private: + std::shared_ptr _qnn_instance; + size_t _size = 0; + uint8_t *_qnn_rpc_buffer = nullptr; + Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr; + + ggml_qnn_rpc_buffer(const ggml_qnn_rpc_buffer &) = delete; + void operator=(const ggml_qnn_rpc_buffer &) = delete; + ggml_qnn_rpc_buffer(ggml_qnn_rpc_buffer &&) = delete; + void operator=(ggml_qnn_rpc_buffer &&) = delete; +}; + +} // namespace qnn diff --git a/ggml/src/ggml-qnn/qnn-lib.hpp b/ggml/src/ggml-qnn/qnn-lib.hpp index 4e1dcb34c..aa142c74a 100644 --- a/ggml/src/ggml-qnn/qnn-lib.hpp +++ b/ggml/src/ggml-qnn/qnn-lib.hpp @@ -638,56 +638,6 @@ public: return mem_fd; } - int register_rpcmem(void *p_data, Qnn_Tensor_t *p_tensor) { - if (nullptr == p_data || (nullptr == p_tensor)) { - QNN_LOG_WARN("invalid param\n"); - return 1; - } - - if (!is_rpcmem_initialized()) { - QNN_LOG_WARN("rpc memory not initialized\n"); - return 2; - } - - if (is_rpcmem_allocated(p_data)) { - QNN_LOG_WARN("rpc memory already allocated\n"); - return 3; - } - - if (is_rpcmem_registered(QNN_TENSOR_GET_MEM_HANDLE(*p_tensor))) { - QNN_LOG_WARN("tensor %s has been registered shared memory\n", QNN_TENSOR_GET_NAME(*p_tensor)); - return 4; - } - - int32_t mem_fd = rpcmem_to_fd(p_data); - if (mem_fd == -1) { - QNN_LOG_WARN("failed to get file descriptor\n"); - return 5; - } - QNN_LOG_INFO("mem_fd %d\n", mem_fd); - Qnn_MemDescriptor_t descriptor = { { QNN_TENSOR_GET_RANK(*p_tensor), QNN_TENSOR_GET_DIMENSIONS(*p_tensor), - nullptr }, - QNN_TENSOR_GET_DATA_TYPE(*p_tensor), - QNN_MEM_TYPE_ION, - { { mem_fd } } }; - Qnn_MemHandle_t handle = nullptr; - int error = QNN_SUCCESS; - error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor, - /*numDescriptors=*/1, &handle); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", QNN_GET_ERROR_CODE(error), - strerror(error)); - return 6; - } - - QNN_TENSOR_SET_MEM_HANDLE(*p_tensor, handle); - _qnn_mem_set.insert((std::pair(p_data, handle))); - - QNN_LOG_INFO("tensor %s successfully register shared memory handler: %p\n", QNN_TENSOR_GET_NAME(*p_tensor), - handle); - return 0; - } - void *get_rpcmem_from_memhandle(Qnn_MemHandle_t mem_handle) { for (std::unordered_map::iterator it = _qnn_mem_set.begin(); it != _qnn_mem_set.end(); it++) { @@ -700,22 +650,56 @@ public: return nullptr; } - void unregister_rpcmem() { - Qnn_ErrorHandle_t error = QNN_SUCCESS; - - if (_qnn_mem_set.empty()) { - QNN_LOG_WARN("no rpcmem registered\n"); + Qnn_MemHandle_t register_rpcmem(void *p_data, uint32_t rank, uint32_t *dimensions, Qnn_DataType_t data_type) { + if (!p_data) { + QNN_LOG_WARN("invalid param\n"); + return nullptr; } - for (std::unordered_map::iterator it = _qnn_mem_set.begin(); it != _qnn_mem_set.end(); - it++) { - Qnn_MemHandle_t mem_handle = it->second; - error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to unregister shared memory, error %d\n", QNN_GET_ERROR_CODE(error)); - } + if (!is_rpcmem_initialized()) { + QNN_LOG_WARN("rpc memory not initialized\n"); + return nullptr; + } + + if (is_rpcmem_allocated(p_data)) { + QNN_LOG_WARN("rpc memory already allocated\n"); + return nullptr; + } + + auto mem_fd = rpcmem_to_fd(p_data); + if (mem_fd == -1) { + QNN_LOG_WARN("failed to get file descriptor\n"); + return nullptr; + } + + QNN_LOG_INFO("mem_fd %d\n", mem_fd); + Qnn_MemDescriptor_t descriptor = { { rank, dimensions, nullptr }, data_type, QNN_MEM_TYPE_ION, { { mem_fd } } }; + Qnn_MemHandle_t handle = nullptr; + auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor, + /*numDescriptors=*/1, &handle); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", QNN_GET_ERROR_CODE(error), + strerror(error)); + return nullptr; + } + + _qnn_mem_set.insert((std::pair(p_data, handle))); + + QNN_LOG_INFO("successfully register shared memory handler: %p\n", handle); + return handle; + } + + void unregister_rpcmem(Qnn_MemHandle_t mem_handle) { + Qnn_ErrorHandle_t error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to unregister shared memory, error %d\n", QNN_GET_ERROR_CODE(error)); + } + + auto it = std::find_if(_qnn_mem_set.begin(), _qnn_mem_set.end(), + [mem_handle](const auto &kv) { return kv.second == mem_handle; }); + if (it != _qnn_mem_set.end()) { + _qnn_mem_set.erase(it); } - _qnn_mem_set.clear(); } bool is_rpcmem_allocated(void *buf) { return _qnn_mem_set.count(buf) != 0U; } diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/tensor.hpp index 7709936ed..c4ea7a409 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/tensor.hpp @@ -10,6 +10,7 @@ #include "QnnTensor.h" #include "System/QnnSystemInterface.h" +#include "buffer.hpp" #include "logger.hpp" #include "qnn-lib.hpp" #include "utils.hpp" @@ -28,11 +29,7 @@ public: QNN_LOG_DEBUG("create tensor %s, device: %d", _tensor_name.c_str(), device); } - ~ggml_qnn_tensor() { - if (_qnn_instance && _qnn_rpc_buffer) { - _qnn_instance->free_rpcmem(_qnn_rpc_buffer); - } - } + ~ggml_qnn_tensor() { _qnn_rpc_buffer.reset(); } bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input) { if (_tensor) { @@ -65,13 +62,19 @@ public: if (should_use_mem_handle()) { if (!_qnn_rpc_buffer) { - _qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor)); - if (!_qnn_rpc_buffer) { + auto qnn_rpc_buffer = std::make_unique( + _qnn_instance, ggml_nbytes(tensor), QNN_TENSOR_GET_RANK(_qnn_tensor), + QNN_TENSOR_GET_DIMENSIONS(_qnn_tensor), QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor)); + if (!qnn_rpc_buffer->is_valid()) { QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str()); return false; } + + _qnn_rpc_buffer = std::move(qnn_rpc_buffer); } + QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE); + QNN_TENSOR_SET_MEM_HANDLE(_qnn_tensor, _qnn_rpc_buffer->get_mem_handle()); QNN_LOG_DEBUG("tensor %s, use mem handle %p", _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor)); } else { QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW); @@ -132,7 +135,7 @@ private: if (should_use_mem_handle()) { if (_qnn_rpc_buffer) { - memcpy(_qnn_rpc_buffer, _tensor->data, ggml_nbytes(_tensor)); + memcpy(_qnn_rpc_buffer->get_buffer(), _tensor->data, ggml_nbytes(_tensor)); } else { QNN_LOG_WARN("tensor %s: can't find rpcmem from qnn mem handle\n", _tensor_name.c_str()); return false; @@ -153,7 +156,7 @@ private: if (should_use_mem_handle()) { if (_qnn_rpc_buffer) { - memcpy(_tensor->data, _qnn_rpc_buffer, ggml_nbytes(_tensor)); + memcpy(_tensor->data, _qnn_rpc_buffer->get_buffer(), ggml_nbytes(_tensor)); } else { QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n"); return false; @@ -165,29 +168,6 @@ private: return true; } - uint8_t *alloc_rpc_mem(size_t bytes) { - uint8_t *qnn_rpc_buffer = static_cast(_qnn_instance->alloc_rpcmem(bytes, alignof(void *))); - if (!qnn_rpc_buffer) { - QNN_LOG_WARN("alloc rpc mem failure, %s\n", strerror(errno)); - QNN_LOG_DEBUG("tensor name %s", _tensor_name.c_str()); - return nullptr; - } - - QNN_LOG_INFO("tensor %s: alloc rpcmem(%p) successfully\n", _tensor_name.c_str(), qnn_rpc_buffer); - auto error = _qnn_instance->register_rpcmem(qnn_rpc_buffer, &_qnn_tensor); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("register rpc mem failure, %d\n", (int)error); - QNN_LOG_DEBUG("tensor name %s", _tensor_name.c_str()); - _qnn_instance->free_rpcmem(qnn_rpc_buffer); - return nullptr; - } - - // The mem handle will be set at qnn_instance::register_rpcmem - QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE); - QNN_LOG_INFO("tensor %s: register rpcmem(%p) successfully\n", _tensor_name.c_str(), qnn_rpc_buffer); - return qnn_rpc_buffer; - } - void update_params_from_ggml_tensor(ggml_tensor *tensor) { _dimensions[0] = (uint32_t)tensor->ne[0]; _dimensions[1] = (uint32_t)tensor->ne[1]; @@ -211,7 +191,7 @@ private: Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion); std::array _dimensions = {}; Qnn_GraphHandle_t _graph_handle = nullptr; - uint8_t *_qnn_rpc_buffer = nullptr; + std::unique_ptr _qnn_rpc_buffer; ggml_qnn_tensor(const ggml_qnn_tensor &) = delete; void operator=(const ggml_qnn_tensor &) = delete;