fix: 1.free up rpc memory at destruct

2. unbind tesnsor
This commit is contained in:
hongruichen 2024-07-22 23:41:23 +08:00
parent 706793f078
commit f843e5aaf5
2 changed files with 28 additions and 12 deletions

View file

@ -86,7 +86,7 @@ public:
return false;
}
QNN_LOG_DEBUG("graph name %s, add_nodes start", _graph_name.c_str());
QNN_LOG_DEBUG("graph name %s, build_graph start", _graph_name.c_str());
_qnn_tensor_inputs.resize(tensor_inputs.size());
_tensor_inputs.resize(tensor_inputs.size());
for (size_t i = 0; i < tensor_inputs.size(); i++) {
@ -111,7 +111,7 @@ public:
snprintf(buffer, GGML_MAX_NAME, "dst%d", (int)i);
auto qnn_tensor =
std::make_shared<ggml_qnn_tensor>(std::string(buffer), _device, _graph_handle, _qnn_instance);
auto *ggml_tensor = tensor_inputs[i];
auto *ggml_tensor = tensor_outputs[i];
if (!qnn_tensor->bind_ggml_tensor(ggml_tensor, false)) {
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
return false;
@ -155,7 +155,7 @@ public:
return false;
}
QNN_LOG_DEBUG("graph name %s, add_nodes succeed", _graph_name.c_str());
QNN_LOG_DEBUG("graph name %s, build_graph succeed", _graph_name.c_str());
return true;
}
@ -173,7 +173,7 @@ public:
}
for (size_t i = 0; i < tensor_outputs.size(); i++) {
auto *ggml_tensor = tensor_inputs[i];
auto *ggml_tensor = tensor_outputs[i];
if (!_tensor_outputs[i]->bind_ggml_tensor(ggml_tensor, false)) {
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
return false;
@ -191,6 +191,14 @@ public:
}
}
for (auto tensor : _tensor_inputs) {
tensor->unbind_ggml_tensor();
}
for (auto tensor : _tensor_outputs) {
tensor->unbind_ggml_tensor();
}
if (error != QNN_SUCCESS) {
QNN_LOG_INFO("error = %d\n", error);
return false;

View file

@ -28,18 +28,23 @@ public:
QNN_LOG_DEBUG("create tensor %s, device: %d", _tensor_name.c_str(), device);
}
~ggml_qnn_tensor() {
if (_qnn_instance && _qnn_rpc_buffer) {
_qnn_instance->free_rpcmem(_qnn_rpc_buffer);
}
}
bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input) {
if (_tensor) {
if (_tensor != tensor) {
QNN_LOG_WARN("tensor %s has been bound to another ggml tensor %s", _tensor_name.c_str(),
ggml_get_name(_tensor));
return false;
} else {
}
QNN_LOG_INFO("tensor %s already bound to same ggml tensor %s", _tensor_name.c_str(),
ggml_get_name(_tensor));
return true;
}
}
update_params_from_ggml_tensor(tensor);
Qnn_TensorType_t new_tensor_type = is_input ? QNN_TENSOR_TYPE_APP_WRITE : QNN_TENSOR_TYPE_APP_READ;
@ -55,14 +60,17 @@ public:
return false;
}
QNN_TENSOR_SET_ID(_qnn_tensor, QNN_TENSOR_GET_ID(qnn_tensor));
QNN_LOG_DEBUG("create graph tensor %s, id: %d", _tensor_name.c_str(), QNN_TENSOR_GET_ID(qnn_tensor));
}
if (should_use_mem_handle()) {
if (!_qnn_rpc_buffer) {
_qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor));
if (!_qnn_rpc_buffer) {
QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str());
return false;
}
}
QNN_LOG_DEBUG("tensor %s, use mem handle %p", _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor));
} else {
@ -107,8 +115,8 @@ public:
QNN_LOG_DEBUG("tensor %s, clear client buffer", _tensor_name.c_str());
}
QNN_LOG_DEBUG("unbind tensor: %s from ggml tensor: %s", _tensor_name.c_str(), ggml_get_name(_tensor));
_tensor = nullptr;
QNN_LOG_DEBUG("unbind tensor: %s from ggml tensor: %s", _tensor_name.c_str(), _tensor->name);
return true;
}