fix: 1.free up rpc memory at destruct

2. unbind tesnsor
2024-07-22 23:41:23 +08:00 · 2024-07-22 23:41:23 +08:00 · f843e5aaf5
commit f843e5aaf5
parent 706793f078
2 changed files with 28 additions and 12 deletions
--- a/ggml/src/ggml-qnn/graph.hpp
+++ b/ggml/src/ggml-qnn/graph.hpp
@ -86,7 +86,7 @@ public:
            return false;
        }

-        QNN_LOG_DEBUG("graph name %s, add_nodes start", _graph_name.c_str());
+        QNN_LOG_DEBUG("graph name %s, build_graph start", _graph_name.c_str());
        _qnn_tensor_inputs.resize(tensor_inputs.size());
        _tensor_inputs.resize(tensor_inputs.size());
        for (size_t i = 0; i < tensor_inputs.size(); i++) {
@ -111,7 +111,7 @@ public:
            snprintf(buffer, GGML_MAX_NAME, "dst%d", (int)i);
            auto qnn_tensor =
                std::make_shared<ggml_qnn_tensor>(std::string(buffer), _device, _graph_handle, _qnn_instance);
-            auto *ggml_tensor = tensor_inputs[i];
+            auto *ggml_tensor = tensor_outputs[i];
            if (!qnn_tensor->bind_ggml_tensor(ggml_tensor, false)) {
                QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
                return false;
@ -155,7 +155,7 @@ public:
            return false;
        }

-        QNN_LOG_DEBUG("graph name %s, add_nodes succeed", _graph_name.c_str());
+        QNN_LOG_DEBUG("graph name %s, build_graph succeed", _graph_name.c_str());
        return true;
    }

@ -173,7 +173,7 @@ public:
        }

        for (size_t i = 0; i < tensor_outputs.size(); i++) {
-            auto *ggml_tensor = tensor_inputs[i];
+            auto *ggml_tensor = tensor_outputs[i];
            if (!_tensor_outputs[i]->bind_ggml_tensor(ggml_tensor, false)) {
                QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
                return false;
@ -191,6 +191,14 @@ public:
            }
        }

+        for (auto tensor : _tensor_inputs) {
+            tensor->unbind_ggml_tensor();
+        }
+
+        for (auto tensor : _tensor_outputs) {
+            tensor->unbind_ggml_tensor();
+        }
+
        if (error != QNN_SUCCESS) {
            QNN_LOG_INFO("error = %d\n", error);
            return false;
--- a/ggml/src/ggml-qnn/tensor.hpp
+++ b/ggml/src/ggml-qnn/tensor.hpp
@ -28,18 +28,23 @@ public:
        QNN_LOG_DEBUG("create tensor %s, device: %d", _tensor_name.c_str(), device);
    }

+    ~ggml_qnn_tensor() {
+        if (_qnn_instance && _qnn_rpc_buffer) {
+            _qnn_instance->free_rpcmem(_qnn_rpc_buffer);
+        }
+    }
+
    bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input) {
        if (_tensor) {
            if (_tensor != tensor) {
                QNN_LOG_WARN("tensor %s has been bound to another ggml tensor %s", _tensor_name.c_str(),
                             ggml_get_name(_tensor));
                return false;
-            } else {
+            }
            QNN_LOG_INFO("tensor %s already bound to same ggml tensor %s", _tensor_name.c_str(),
                         ggml_get_name(_tensor));
            return true;
        }
-        }

        update_params_from_ggml_tensor(tensor);
        Qnn_TensorType_t new_tensor_type = is_input ? QNN_TENSOR_TYPE_APP_WRITE : QNN_TENSOR_TYPE_APP_READ;
@ -55,14 +60,17 @@ public:
                return false;
            }
            QNN_TENSOR_SET_ID(_qnn_tensor, QNN_TENSOR_GET_ID(qnn_tensor));
+            QNN_LOG_DEBUG("create graph tensor %s, id: %d", _tensor_name.c_str(), QNN_TENSOR_GET_ID(qnn_tensor));
        }

        if (should_use_mem_handle()) {
+            if (!_qnn_rpc_buffer) {
                _qnn_rpc_buffer = alloc_rpc_mem(ggml_nbytes(tensor));
                if (!_qnn_rpc_buffer) {
                    QNN_LOG_WARN("alloc rpc mem failed, tensor %s", _tensor_name.c_str());
                    return false;
                }
+            }

            QNN_LOG_DEBUG("tensor %s, use mem handle %p", _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor));
        } else {
@ -107,8 +115,8 @@ public:
            QNN_LOG_DEBUG("tensor %s, clear client buffer", _tensor_name.c_str());
        }

+        QNN_LOG_DEBUG("unbind tensor: %s from ggml tensor: %s", _tensor_name.c_str(), ggml_get_name(_tensor));
        _tensor = nullptr;
-        QNN_LOG_DEBUG("unbind tensor: %s from ggml tensor: %s", _tensor_name.c_str(), _tensor->name);
        return true;
    }