refactoring: split qnn rpc buffer into dedicated class

This commit is contained in:
hongruichen 2024-07-26 22:33:30 +08:00
parent f843e5aaf5
commit ee305cc171
3 changed files with 116 additions and 96 deletions

View file

@ -0,0 +1,56 @@
#pragma once
#include <cstdint>
#include "logger.hpp"
#include "qnn-lib.hpp"
namespace qnn {
class ggml_qnn_rpc_buffer {
public:
ggml_qnn_rpc_buffer(std::shared_ptr<qnn_instance> qnn_instance, size_t size, uint32_t rank, uint32_t *dimensions,
Qnn_DataType_t data_type) :
_qnn_instance(qnn_instance), _size(size) {
auto *qnn_rpc_buffer = static_cast<uint8_t *>(qnn_instance->alloc_rpcmem(size, alignof(void *)));
_qnn_rpc_mem_handle = qnn_instance->register_rpcmem(qnn_rpc_buffer, rank, dimensions, data_type);
if (!_qnn_rpc_mem_handle) {
qnn_instance->free_rpcmem(qnn_rpc_buffer);
QNN_LOG_WARN("register rpc mem failure\n");
return;
}
_qnn_rpc_buffer = qnn_rpc_buffer;
QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d\n", _qnn_rpc_buffer, (int)size);
}
~ggml_qnn_rpc_buffer() {
if (_qnn_instance) {
if (_qnn_rpc_mem_handle) {
_qnn_instance->unregister_rpcmem(_qnn_rpc_mem_handle);
}
if (_qnn_rpc_buffer) {
_qnn_instance->free_rpcmem(_qnn_rpc_buffer);
}
}
}
bool is_valid() const { return _qnn_rpc_buffer && _qnn_rpc_mem_handle; }
uint8_t *get_buffer() const { return _qnn_rpc_buffer; }
size_t get_size() const { return _size; }
Qnn_MemHandle_t get_mem_handle() const { return _qnn_rpc_mem_handle; }
private:
std::shared_ptr<qnn_instance> _qnn_instance;
size_t _size = 0;
uint8_t *_qnn_rpc_buffer = nullptr;
Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr;
ggml_qnn_rpc_buffer(const ggml_qnn_rpc_buffer &) = delete;
void operator=(const ggml_qnn_rpc_buffer &) = delete;
ggml_qnn_rpc_buffer(ggml_qnn_rpc_buffer &&) = delete;
void operator=(ggml_qnn_rpc_buffer &&) = delete;
};
} // namespace qnn

View file

@ -638,56 +638,6 @@ public:
return mem_fd;
}
int register_rpcmem(void *p_data, Qnn_Tensor_t *p_tensor) {
if (nullptr == p_data || (nullptr == p_tensor)) {
QNN_LOG_WARN("invalid param\n");
return 1;
}
if (!is_rpcmem_initialized()) {
QNN_LOG_WARN("rpc memory not initialized\n");
return 2;
}
if (is_rpcmem_allocated(p_data)) {
QNN_LOG_WARN("rpc memory already allocated\n");
return 3;
}
if (is_rpcmem_registered(QNN_TENSOR_GET_MEM_HANDLE(*p_tensor))) {
QNN_LOG_WARN("tensor %s has been registered shared memory\n", QNN_TENSOR_GET_NAME(*p_tensor));
return 4;
}
int32_t mem_fd = rpcmem_to_fd(p_data);
if (mem_fd == -1) {
QNN_LOG_WARN("failed to get file descriptor\n");
return 5;
}
QNN_LOG_INFO("mem_fd %d\n", mem_fd);
Qnn_MemDescriptor_t descriptor = { { QNN_TENSOR_GET_RANK(*p_tensor), QNN_TENSOR_GET_DIMENSIONS(*p_tensor),
nullptr },
QNN_TENSOR_GET_DATA_TYPE(*p_tensor),
QNN_MEM_TYPE_ION,
{ { mem_fd } } };
Qnn_MemHandle_t handle = nullptr;
int error = QNN_SUCCESS;
error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor,
/*numDescriptors=*/1, &handle);
if (error != QNN_SUCCESS) {
QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", QNN_GET_ERROR_CODE(error),
strerror(error));
return 6;
}
QNN_TENSOR_SET_MEM_HANDLE(*p_tensor, handle);
_qnn_mem_set.insert((std::pair<void *, Qnn_MemHandle_t>(p_data, handle)));
QNN_LOG_INFO("tensor %s successfully register shared memory handler: %p\n", QNN_TENSOR_GET_NAME(*p_tensor),
handle);
return 0;
}
void *get_rpcmem_from_memhandle(Qnn_MemHandle_t mem_handle) {
for (std::unordered_map<void *, Qnn_MemHandle_t>::iterator it = _qnn_mem_set.begin(); it != _qnn_mem_set.end();
it++) {
@ -700,22 +650,56 @@ public:
return nullptr;
}
void unregister_rpcmem() {
Qnn_ErrorHandle_t error = QNN_SUCCESS;
if (_qnn_mem_set.empty()) {
QNN_LOG_WARN("no rpcmem registered\n");
Qnn_MemHandle_t register_rpcmem(void *p_data, uint32_t rank, uint32_t *dimensions, Qnn_DataType_t data_type) {
if (!p_data) {
QNN_LOG_WARN("invalid param\n");
return nullptr;
}
for (std::unordered_map<void *, Qnn_MemHandle_t>::iterator it = _qnn_mem_set.begin(); it != _qnn_mem_set.end();
it++) {
Qnn_MemHandle_t mem_handle = it->second;
error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1);
if (error != QNN_SUCCESS) {
QNN_LOG_WARN("failed to unregister shared memory, error %d\n", QNN_GET_ERROR_CODE(error));
}
if (!is_rpcmem_initialized()) {
QNN_LOG_WARN("rpc memory not initialized\n");
return nullptr;
}
if (is_rpcmem_allocated(p_data)) {
QNN_LOG_WARN("rpc memory already allocated\n");
return nullptr;
}
auto mem_fd = rpcmem_to_fd(p_data);
if (mem_fd == -1) {
QNN_LOG_WARN("failed to get file descriptor\n");
return nullptr;
}
QNN_LOG_INFO("mem_fd %d\n", mem_fd);
Qnn_MemDescriptor_t descriptor = { { rank, dimensions, nullptr }, data_type, QNN_MEM_TYPE_ION, { { mem_fd } } };
Qnn_MemHandle_t handle = nullptr;
auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor,
/*numDescriptors=*/1, &handle);
if (error != QNN_SUCCESS) {
QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", QNN_GET_ERROR_CODE(error),
strerror(error));
return nullptr;
}
_qnn_mem_set.insert((std::pair<void *, Qnn_MemHandle_t>(p_data, handle)));
QNN_LOG_INFO("successfully register shared memory handler: %p\n", handle);
return handle;
}
void unregister_rpcmem(Qnn_MemHandle_t mem_handle) {
Qnn_ErrorHandle_t error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1);
if (error != QNN_SUCCESS) {
QNN_LOG_WARN("failed to unregister shared memory, error %d\n", QNN_GET_ERROR_CODE(error));
}
auto it = std::find_if(_qnn_mem_set.begin(), _qnn_mem_set.end(),
[mem_handle](const auto &kv) { return kv.second == mem_handle; });
if (it != _qnn_mem_set.end()) {
_qnn_mem_set.erase(it);
}
_qnn_mem_set.clear();
}
bool is_rpcmem_allocated(void *buf) { return _qnn_mem_set.count(buf) != 0U; }

View file

@ -10,6 +10,7 @@
#include "QnnTensor.h"
#include "System/QnnSystemInterface.h"
#include "buffer.hpp"
#include "logger.hpp"
#include "qnn-lib.hpp"
#include "utils.hpp"
@ -28,11 +29,7 @@ public:
QNN_LOG_DEBUG("create tensor %s, device: %d", _tensor_name.c_str(), device);
}
~ggml_qnn_tensor() {
if (_qnn_instance && _qnn_rpc_buffer) {
_qnn_instance->free_rpcmem(_qnn_rpc_buffer);
}
}
~ggml_qnn_tensor() { _qnn_rpc_buffer.reset(); }
bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input) {
if (_tensor) {
@ -65,13 +62,19 @@ public:
if (should_use_mem_handle()) {
if (!_qnn_rpc_buffer) {
_qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor));
if (!_qnn_rpc_buffer) {
auto qnn_rpc_buffer = std::make_unique<ggml_qnn_rpc_buffer>(
_qnn_instance, ggml_nbytes(tensor), QNN_TENSOR_GET_RANK(_qnn_tensor),
QNN_TENSOR_GET_DIMENSIONS(_qnn_tensor), QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor));
if (!qnn_rpc_buffer->is_valid()) {
QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str());
return false;
}
_qnn_rpc_buffer = std::move(qnn_rpc_buffer);
}
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
QNN_TENSOR_SET_MEM_HANDLE(_qnn_tensor, _qnn_rpc_buffer->get_mem_handle());
QNN_LOG_DEBUG("tensor %s, use mem handle %p", _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor));
} else {
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW);
@ -132,7 +135,7 @@ private:
if (should_use_mem_handle()) {
if (_qnn_rpc_buffer) {
memcpy(_qnn_rpc_buffer, _tensor->data, ggml_nbytes(_tensor));
memcpy(_qnn_rpc_buffer->get_buffer(), _tensor->data, ggml_nbytes(_tensor));
} else {
QNN_LOG_WARN("tensor %s: can't find rpcmem from qnn mem handle\n", _tensor_name.c_str());
return false;
@ -153,7 +156,7 @@ private:
if (should_use_mem_handle()) {
if (_qnn_rpc_buffer) {
memcpy(_tensor->data, _qnn_rpc_buffer, ggml_nbytes(_tensor));
memcpy(_tensor->data, _qnn_rpc_buffer->get_buffer(), ggml_nbytes(_tensor));
} else {
QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n");
return false;
@ -165,29 +168,6 @@ private:
return true;
}
uint8_t *alloc_rpc_mem(size_t bytes) {
uint8_t *qnn_rpc_buffer = static_cast<uint8_t *>(_qnn_instance->alloc_rpcmem(bytes, alignof(void *)));
if (!qnn_rpc_buffer) {
QNN_LOG_WARN("alloc rpc mem failure, %s\n", strerror(errno));
QNN_LOG_DEBUG("tensor name %s", _tensor_name.c_str());
return nullptr;
}
QNN_LOG_INFO("tensor %s: alloc rpcmem(%p) successfully\n", _tensor_name.c_str(), qnn_rpc_buffer);
auto error = _qnn_instance->register_rpcmem(qnn_rpc_buffer, &_qnn_tensor);
if (error != QNN_SUCCESS) {
QNN_LOG_WARN("register rpc mem failure, %d\n", (int)error);
QNN_LOG_DEBUG("tensor name %s", _tensor_name.c_str());
_qnn_instance->free_rpcmem(qnn_rpc_buffer);
return nullptr;
}
// The mem handle will be set at qnn_instance::register_rpcmem
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
QNN_LOG_INFO("tensor %s: register rpcmem(%p) successfully\n", _tensor_name.c_str(), qnn_rpc_buffer);
return qnn_rpc_buffer;
}
void update_params_from_ggml_tensor(ggml_tensor *tensor) {
_dimensions[0] = (uint32_t)tensor->ne[0];
_dimensions[1] = (uint32_t)tensor->ne[1];
@ -211,7 +191,7 @@ private:
Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion);
std::array<uint32_t, GGML_MAX_DIMS> _dimensions = {};
Qnn_GraphHandle_t _graph_handle = nullptr;
uint8_t *_qnn_rpc_buffer = nullptr;
std::unique_ptr<ggml_qnn_rpc_buffer> _qnn_rpc_buffer;
ggml_qnn_tensor(const ggml_qnn_tensor &) = delete;
void operator=(const ggml_qnn_tensor &) = delete;