refactoring: split qnn rpc buffer into dedicated class
This commit is contained in:
parent
f843e5aaf5
commit
ee305cc171
3 changed files with 116 additions and 96 deletions
56
ggml/src/ggml-qnn/buffer.hpp
Normal file
56
ggml/src/ggml-qnn/buffer.hpp
Normal file
|
@ -0,0 +1,56 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "logger.hpp"
|
||||
#include "qnn-lib.hpp"
|
||||
|
||||
namespace qnn {
|
||||
class ggml_qnn_rpc_buffer {
|
||||
public:
|
||||
ggml_qnn_rpc_buffer(std::shared_ptr<qnn_instance> qnn_instance, size_t size, uint32_t rank, uint32_t *dimensions,
|
||||
Qnn_DataType_t data_type) :
|
||||
_qnn_instance(qnn_instance), _size(size) {
|
||||
|
||||
auto *qnn_rpc_buffer = static_cast<uint8_t *>(qnn_instance->alloc_rpcmem(size, alignof(void *)));
|
||||
_qnn_rpc_mem_handle = qnn_instance->register_rpcmem(qnn_rpc_buffer, rank, dimensions, data_type);
|
||||
if (!_qnn_rpc_mem_handle) {
|
||||
qnn_instance->free_rpcmem(qnn_rpc_buffer);
|
||||
QNN_LOG_WARN("register rpc mem failure\n");
|
||||
return;
|
||||
}
|
||||
|
||||
_qnn_rpc_buffer = qnn_rpc_buffer;
|
||||
QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d\n", _qnn_rpc_buffer, (int)size);
|
||||
}
|
||||
~ggml_qnn_rpc_buffer() {
|
||||
if (_qnn_instance) {
|
||||
if (_qnn_rpc_mem_handle) {
|
||||
_qnn_instance->unregister_rpcmem(_qnn_rpc_mem_handle);
|
||||
}
|
||||
|
||||
if (_qnn_rpc_buffer) {
|
||||
_qnn_instance->free_rpcmem(_qnn_rpc_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool is_valid() const { return _qnn_rpc_buffer && _qnn_rpc_mem_handle; }
|
||||
|
||||
uint8_t *get_buffer() const { return _qnn_rpc_buffer; }
|
||||
size_t get_size() const { return _size; }
|
||||
Qnn_MemHandle_t get_mem_handle() const { return _qnn_rpc_mem_handle; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
size_t _size = 0;
|
||||
uint8_t *_qnn_rpc_buffer = nullptr;
|
||||
Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr;
|
||||
|
||||
ggml_qnn_rpc_buffer(const ggml_qnn_rpc_buffer &) = delete;
|
||||
void operator=(const ggml_qnn_rpc_buffer &) = delete;
|
||||
ggml_qnn_rpc_buffer(ggml_qnn_rpc_buffer &&) = delete;
|
||||
void operator=(ggml_qnn_rpc_buffer &&) = delete;
|
||||
};
|
||||
|
||||
} // namespace qnn
|
|
@ -638,56 +638,6 @@ public:
|
|||
return mem_fd;
|
||||
}
|
||||
|
||||
int register_rpcmem(void *p_data, Qnn_Tensor_t *p_tensor) {
|
||||
if (nullptr == p_data || (nullptr == p_tensor)) {
|
||||
QNN_LOG_WARN("invalid param\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!is_rpcmem_initialized()) {
|
||||
QNN_LOG_WARN("rpc memory not initialized\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (is_rpcmem_allocated(p_data)) {
|
||||
QNN_LOG_WARN("rpc memory already allocated\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (is_rpcmem_registered(QNN_TENSOR_GET_MEM_HANDLE(*p_tensor))) {
|
||||
QNN_LOG_WARN("tensor %s has been registered shared memory\n", QNN_TENSOR_GET_NAME(*p_tensor));
|
||||
return 4;
|
||||
}
|
||||
|
||||
int32_t mem_fd = rpcmem_to_fd(p_data);
|
||||
if (mem_fd == -1) {
|
||||
QNN_LOG_WARN("failed to get file descriptor\n");
|
||||
return 5;
|
||||
}
|
||||
QNN_LOG_INFO("mem_fd %d\n", mem_fd);
|
||||
Qnn_MemDescriptor_t descriptor = { { QNN_TENSOR_GET_RANK(*p_tensor), QNN_TENSOR_GET_DIMENSIONS(*p_tensor),
|
||||
nullptr },
|
||||
QNN_TENSOR_GET_DATA_TYPE(*p_tensor),
|
||||
QNN_MEM_TYPE_ION,
|
||||
{ { mem_fd } } };
|
||||
Qnn_MemHandle_t handle = nullptr;
|
||||
int error = QNN_SUCCESS;
|
||||
error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor,
|
||||
/*numDescriptors=*/1, &handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", QNN_GET_ERROR_CODE(error),
|
||||
strerror(error));
|
||||
return 6;
|
||||
}
|
||||
|
||||
QNN_TENSOR_SET_MEM_HANDLE(*p_tensor, handle);
|
||||
_qnn_mem_set.insert((std::pair<void *, Qnn_MemHandle_t>(p_data, handle)));
|
||||
|
||||
QNN_LOG_INFO("tensor %s successfully register shared memory handler: %p\n", QNN_TENSOR_GET_NAME(*p_tensor),
|
||||
handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *get_rpcmem_from_memhandle(Qnn_MemHandle_t mem_handle) {
|
||||
for (std::unordered_map<void *, Qnn_MemHandle_t>::iterator it = _qnn_mem_set.begin(); it != _qnn_mem_set.end();
|
||||
it++) {
|
||||
|
@ -700,22 +650,56 @@ public:
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void unregister_rpcmem() {
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
|
||||
if (_qnn_mem_set.empty()) {
|
||||
QNN_LOG_WARN("no rpcmem registered\n");
|
||||
Qnn_MemHandle_t register_rpcmem(void *p_data, uint32_t rank, uint32_t *dimensions, Qnn_DataType_t data_type) {
|
||||
if (!p_data) {
|
||||
QNN_LOG_WARN("invalid param\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (std::unordered_map<void *, Qnn_MemHandle_t>::iterator it = _qnn_mem_set.begin(); it != _qnn_mem_set.end();
|
||||
it++) {
|
||||
Qnn_MemHandle_t mem_handle = it->second;
|
||||
error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1);
|
||||
if (!is_rpcmem_initialized()) {
|
||||
QNN_LOG_WARN("rpc memory not initialized\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (is_rpcmem_allocated(p_data)) {
|
||||
QNN_LOG_WARN("rpc memory already allocated\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto mem_fd = rpcmem_to_fd(p_data);
|
||||
if (mem_fd == -1) {
|
||||
QNN_LOG_WARN("failed to get file descriptor\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QNN_LOG_INFO("mem_fd %d\n", mem_fd);
|
||||
Qnn_MemDescriptor_t descriptor = { { rank, dimensions, nullptr }, data_type, QNN_MEM_TYPE_ION, { { mem_fd } } };
|
||||
Qnn_MemHandle_t handle = nullptr;
|
||||
auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor,
|
||||
/*numDescriptors=*/1, &handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", QNN_GET_ERROR_CODE(error),
|
||||
strerror(error));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
_qnn_mem_set.insert((std::pair<void *, Qnn_MemHandle_t>(p_data, handle)));
|
||||
|
||||
QNN_LOG_INFO("successfully register shared memory handler: %p\n", handle);
|
||||
return handle;
|
||||
}
|
||||
|
||||
void unregister_rpcmem(Qnn_MemHandle_t mem_handle) {
|
||||
Qnn_ErrorHandle_t error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to unregister shared memory, error %d\n", QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
|
||||
auto it = std::find_if(_qnn_mem_set.begin(), _qnn_mem_set.end(),
|
||||
[mem_handle](const auto &kv) { return kv.second == mem_handle; });
|
||||
if (it != _qnn_mem_set.end()) {
|
||||
_qnn_mem_set.erase(it);
|
||||
}
|
||||
_qnn_mem_set.clear();
|
||||
}
|
||||
|
||||
bool is_rpcmem_allocated(void *buf) { return _qnn_mem_set.count(buf) != 0U; }
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "QnnTensor.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
#include "buffer.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "qnn-lib.hpp"
|
||||
#include "utils.hpp"
|
||||
|
@ -28,11 +29,7 @@ public:
|
|||
QNN_LOG_DEBUG("create tensor %s, device: %d", _tensor_name.c_str(), device);
|
||||
}
|
||||
|
||||
~ggml_qnn_tensor() {
|
||||
if (_qnn_instance && _qnn_rpc_buffer) {
|
||||
_qnn_instance->free_rpcmem(_qnn_rpc_buffer);
|
||||
}
|
||||
}
|
||||
~ggml_qnn_tensor() { _qnn_rpc_buffer.reset(); }
|
||||
|
||||
bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input) {
|
||||
if (_tensor) {
|
||||
|
@ -65,13 +62,19 @@ public:
|
|||
|
||||
if (should_use_mem_handle()) {
|
||||
if (!_qnn_rpc_buffer) {
|
||||
_qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor));
|
||||
if (!_qnn_rpc_buffer) {
|
||||
auto qnn_rpc_buffer = std::make_unique<ggml_qnn_rpc_buffer>(
|
||||
_qnn_instance, ggml_nbytes(tensor), QNN_TENSOR_GET_RANK(_qnn_tensor),
|
||||
QNN_TENSOR_GET_DIMENSIONS(_qnn_tensor), QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor));
|
||||
if (!qnn_rpc_buffer->is_valid()) {
|
||||
QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
_qnn_rpc_buffer = std::move(qnn_rpc_buffer);
|
||||
}
|
||||
|
||||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
|
||||
QNN_TENSOR_SET_MEM_HANDLE(_qnn_tensor, _qnn_rpc_buffer->get_mem_handle());
|
||||
QNN_LOG_DEBUG("tensor %s, use mem handle %p", _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor));
|
||||
} else {
|
||||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW);
|
||||
|
@ -132,7 +135,7 @@ private:
|
|||
|
||||
if (should_use_mem_handle()) {
|
||||
if (_qnn_rpc_buffer) {
|
||||
memcpy(_qnn_rpc_buffer, _tensor->data, ggml_nbytes(_tensor));
|
||||
memcpy(_qnn_rpc_buffer->get_buffer(), _tensor->data, ggml_nbytes(_tensor));
|
||||
} else {
|
||||
QNN_LOG_WARN("tensor %s: can't find rpcmem from qnn mem handle\n", _tensor_name.c_str());
|
||||
return false;
|
||||
|
@ -153,7 +156,7 @@ private:
|
|||
|
||||
if (should_use_mem_handle()) {
|
||||
if (_qnn_rpc_buffer) {
|
||||
memcpy(_tensor->data, _qnn_rpc_buffer, ggml_nbytes(_tensor));
|
||||
memcpy(_tensor->data, _qnn_rpc_buffer->get_buffer(), ggml_nbytes(_tensor));
|
||||
} else {
|
||||
QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n");
|
||||
return false;
|
||||
|
@ -165,29 +168,6 @@ private:
|
|||
return true;
|
||||
}
|
||||
|
||||
uint8_t *alloc_rpc_mem(size_t bytes) {
|
||||
uint8_t *qnn_rpc_buffer = static_cast<uint8_t *>(_qnn_instance->alloc_rpcmem(bytes, alignof(void *)));
|
||||
if (!qnn_rpc_buffer) {
|
||||
QNN_LOG_WARN("alloc rpc mem failure, %s\n", strerror(errno));
|
||||
QNN_LOG_DEBUG("tensor name %s", _tensor_name.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QNN_LOG_INFO("tensor %s: alloc rpcmem(%p) successfully\n", _tensor_name.c_str(), qnn_rpc_buffer);
|
||||
auto error = _qnn_instance->register_rpcmem(qnn_rpc_buffer, &_qnn_tensor);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("register rpc mem failure, %d\n", (int)error);
|
||||
QNN_LOG_DEBUG("tensor name %s", _tensor_name.c_str());
|
||||
_qnn_instance->free_rpcmem(qnn_rpc_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// The mem handle will be set at qnn_instance::register_rpcmem
|
||||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
|
||||
QNN_LOG_INFO("tensor %s: register rpcmem(%p) successfully\n", _tensor_name.c_str(), qnn_rpc_buffer);
|
||||
return qnn_rpc_buffer;
|
||||
}
|
||||
|
||||
void update_params_from_ggml_tensor(ggml_tensor *tensor) {
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
|
@ -211,7 +191,7 @@ private:
|
|||
Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion);
|
||||
std::array<uint32_t, GGML_MAX_DIMS> _dimensions = {};
|
||||
Qnn_GraphHandle_t _graph_handle = nullptr;
|
||||
uint8_t *_qnn_rpc_buffer = nullptr;
|
||||
std::unique_ptr<ggml_qnn_rpc_buffer> _qnn_rpc_buffer;
|
||||
|
||||
ggml_qnn_tensor(const ggml_qnn_tensor &) = delete;
|
||||
void operator=(const ggml_qnn_tensor &) = delete;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue