diff --git a/ggml/src/ggml-qnn/graph.hpp b/ggml/src/ggml-qnn/graph.hpp index 9941365f7..c82b7d66a 100644 --- a/ggml/src/ggml-qnn/graph.hpp +++ b/ggml/src/ggml-qnn/graph.hpp @@ -86,7 +86,7 @@ public: return false; } - QNN_LOG_DEBUG("graph name %s, add_nodes start", _graph_name.c_str()); + QNN_LOG_DEBUG("graph name %s, build_graph start", _graph_name.c_str()); _qnn_tensor_inputs.resize(tensor_inputs.size()); _tensor_inputs.resize(tensor_inputs.size()); for (size_t i = 0; i < tensor_inputs.size(); i++) { @@ -111,7 +111,7 @@ public: snprintf(buffer, GGML_MAX_NAME, "dst%d", (int)i); auto qnn_tensor = std::make_shared(std::string(buffer), _device, _graph_handle, _qnn_instance); - auto *ggml_tensor = tensor_inputs[i]; + auto *ggml_tensor = tensor_outputs[i]; if (!qnn_tensor->bind_ggml_tensor(ggml_tensor, false)) { QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor)); return false; @@ -155,7 +155,7 @@ public: return false; } - QNN_LOG_DEBUG("graph name %s, add_nodes succeed", _graph_name.c_str()); + QNN_LOG_DEBUG("graph name %s, build_graph succeed", _graph_name.c_str()); return true; } @@ -173,7 +173,7 @@ public: } for (size_t i = 0; i < tensor_outputs.size(); i++) { - auto *ggml_tensor = tensor_inputs[i]; + auto *ggml_tensor = tensor_outputs[i]; if (!_tensor_outputs[i]->bind_ggml_tensor(ggml_tensor, false)) { QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor)); return false; @@ -191,6 +191,14 @@ public: } } + for (auto tensor : _tensor_inputs) { + tensor->unbind_ggml_tensor(); + } + + for (auto tensor : _tensor_outputs) { + tensor->unbind_ggml_tensor(); + } + if (error != QNN_SUCCESS) { QNN_LOG_INFO("error = %d\n", error); return false; diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/tensor.hpp index 5e45266b4..7709936ed 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/tensor.hpp @@ -28,17 +28,22 @@ public: QNN_LOG_DEBUG("create tensor %s, device: %d", _tensor_name.c_str(), device); } + ~ggml_qnn_tensor() { + if (_qnn_instance && _qnn_rpc_buffer) { + _qnn_instance->free_rpcmem(_qnn_rpc_buffer); + } + } + bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input) { if (_tensor) { if (_tensor != tensor) { QNN_LOG_WARN("tensor %s has been bound to another ggml tensor %s", _tensor_name.c_str(), ggml_get_name(_tensor)); return false; - } else { - QNN_LOG_INFO("tensor %s already bound to same ggml tensor %s", _tensor_name.c_str(), - ggml_get_name(_tensor)); - return true; } + QNN_LOG_INFO("tensor %s already bound to same ggml tensor %s", _tensor_name.c_str(), + ggml_get_name(_tensor)); + return true; } update_params_from_ggml_tensor(tensor); @@ -55,13 +60,16 @@ public: return false; } QNN_TENSOR_SET_ID(_qnn_tensor, QNN_TENSOR_GET_ID(qnn_tensor)); + QNN_LOG_DEBUG("create graph tensor %s, id: %d", _tensor_name.c_str(), QNN_TENSOR_GET_ID(qnn_tensor)); } if (should_use_mem_handle()) { - _qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor)); if (!_qnn_rpc_buffer) { - QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str()); - return false; + _qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor)); + if (!_qnn_rpc_buffer) { + QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str()); + return false; + } } QNN_LOG_DEBUG("tensor %s, use mem handle %p", _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor)); @@ -107,8 +115,8 @@ public: QNN_LOG_DEBUG("tensor %s, clear client buffer", _tensor_name.c_str()); } + QNN_LOG_DEBUG("unbind tensor: %s from ggml tensor: %s", _tensor_name.c_str(), ggml_get_name(_tensor)); _tensor = nullptr; - QNN_LOG_DEBUG("unbind tensor: %s from ggml tensor: %s", _tensor_name.c_str(), _tensor->name); return true; }