From ca0d999c2ab97c11174a1f30852a311038792192 Mon Sep 17 00:00:00 2001 From: hongruichen Date: Thu, 4 Jul 2024 23:32:21 +0800 Subject: [PATCH] add ggml_qnn_graph --- ggml/src/ggml-qnn/backend-ops.cpp | 5 +- ggml/src/ggml-qnn/graph.hpp | 136 ++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 ggml/src/ggml-qnn/graph.hpp diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/backend-ops.cpp index cde1bd248..3365e85b8 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/backend-ops.cpp @@ -1,6 +1,7 @@ #include "backend-ops.hpp" +#include "graph.hpp" #include "logger.hpp" #include "tensor.hpp" #include "utils.hpp" @@ -130,7 +131,7 @@ static void ggml_qnn_add(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() }; Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() }; - Qnn_OpConfig_t op_config = { (Qnn_OpConfigVersion_t)1, + Qnn_OpConfig_t op_config = { QNN_OPCONFIG_VERSION_1, .v1 = { "ggml_op_add", QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_ELEMENT_WISE_ADD, 0, qnn_params, 2, tensor_inputs, 1, tensor_outputs } }; error = qnn_raw_interface.graphAddNode(graph_handle, op_config); @@ -300,7 +301,7 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context *ctx, const ggml_tensor *s Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() }; Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() }; - Qnn_OpConfig_t op_config = { (Qnn_OpConfigVersion_t)1, + Qnn_OpConfig_t op_config = { QNN_OPCONFIG_VERSION_1, .v1 = { "ggml_op_mul_mat", QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL, 0, qnn_params, 2, tensor_inputs, 1, tensor_outputs } }; error = qnn_raw_interface.graphAddNode(graph_handle, op_config); diff --git a/ggml/src/ggml-qnn/graph.hpp b/ggml/src/ggml-qnn/graph.hpp new file mode 100644 index 000000000..f2c27aeb3 --- /dev/null +++ b/ggml/src/ggml-qnn/graph.hpp @@ -0,0 +1,136 @@ + +#pragma once + +#include + +#include "ggml-qnn.h" + +#include "logger.hpp" +#include "qnn.hpp" + +namespace qnn { + +template +class ggml_qnn_graph { +public: + typedef std::array input_tensor_array_t; + typedef std::array output_tensor_array_t; + + explicit ggml_qnn_graph(const std::string &graph_name, QNNBackend device, Qnn_ContextHandle_t qnn_context, + QNN_INTERFACE_VER_TYPE qnn_interface, size_t vtcm_size_in_mb) : + _device(device), _qnn_interface(qnn_interface) { + QNN_LOG_INFO("graph name %s", graph_name.c_str()); + + Qnn_ErrorHandle_t error = QNN_SUCCESS; + Qnn_GraphHandle_t graph_handle = nullptr; + if (device == QNN_BACKEND_NPU) { + // TODO: fix graph config here for NPU + QnnHtpGraph_CustomConfig_t hvx_config; + hvx_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS; + hvx_config.numHvxThreads = 8; + QnnGraph_Config_t graph_hvx_config; + graph_hvx_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_hvx_config.customConfig = &hvx_config; + + QnnHtpGraph_CustomConfig_t dlbc_config; + dlbc_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; + dlbc_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_ENABLE_DLBC; + dlbc_config.optimizationOption.floatValue = 1.0; // set to 0.0 to turn off DLBC + QnnGraph_Config_t graph_dlbc_config; + graph_dlbc_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_dlbc_config.customConfig = &dlbc_config; + + QnnHtpGraph_CustomConfig_t opt_config; + opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; + opt_config.optimizationOption.floatValue = 1; // 1 / 3 + QnnGraph_Config_t graph_opt_config; + graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_opt_config.customConfig = &opt_config; + + QnnHtpGraph_CustomConfig_t vtcm_config; + vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; + vtcm_config.vtcmSizeInMB = vtcm_size_in_mb; + QnnGraph_Config_t graph_vtcm_config; + graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_vtcm_config.customConfig = &vtcm_config; + + const QnnGraph_Config_t *p_graphconfig[] = { &graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config, + &graph_opt_config, nullptr }; + error = qnn_interface.graphCreate(qnn_context, graph_name.c_str(), p_graphconfig, &graph_handle); + } else { + error = qnn_interface.graphCreate(qnn_context, graph_name.c_str(), nullptr, &graph_handle); + } + + if (error != QNN_SUCCESS) { + QNN_LOG_INFO( + "can't create qnn graph handle with graph name %s, " + "error = %d\n", + graph_name.c_str(), error); + return; + } else { + QNN_LOG_INFO("create qnn graph handle with graph name %s ok\n", graph_name.c_str()); + } + + _graph_handle = graph_handle; + } + + bool add_nodes(const input_tensor_array_t &tensor_inputs, const output_tensor_array_t &tensor_outputs) { + if (!is_valid()) { + QNN_LOG_ERROR("Invalid graph\n"); + return false; + } + + Qnn_Param_t qnn_params[] = {}; + Qnn_OpConfig_t op_config = { QNN_OPCONFIG_VERSION_1, + .v1 = { "ggml_op_add", QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_ELEMENT_WISE_ADD, 0, + qnn_params, _tensor_inputs.size(), _tensor_inputs.data(), + _tensor_outputs.size(), _tensor_outputs.data() } }; + auto error = _qnn_interface.graphAddNode(_graph_handle, op_config); + if (error != QNN_SUCCESS) { + QNN_LOG_ERROR("graphAddNode.error = %d\n", error); + return false; + } + + error = _qnn_interface.graphFinalize(_graph_handle, nullptr, nullptr); + if (error != QNN_SUCCESS) { + QNN_LOG_ERROR("graphFinalize.error = %d\n", error); + return false; + } + + return true; + } + + bool execute() { + auto error = _qnn_interface.graphExecute(_graph_handle, _tensor_inputs.data(), _tensor_inputs.size(), + _tensor_outputs.data(), _tensor_outputs.size(), nullptr, nullptr); + if (_device == QNN_BACKEND_NPU) { + if (error == QNN_COMMON_ERROR_SYSTEM_COMMUNICATION) { + QNN_LOG_WARN("NPU crashed. SSR detected. Caused QNN graph execute error\n"); + } + } + + if (error != QNN_SUCCESS) { + QNN_LOG_INFO("error = %d\n", error); + return false; + } + + return true; + } + + bool is_valid() const { return _graph_handle != nullptr; } + + Qnn_GraphHandle_t get_graph_handler() const { return _graph_handle; } + +private: + const QNNBackend _device; + const QNN_INTERFACE_VER_TYPE _qnn_interface; + Qnn_GraphHandle_t _graph_handle = nullptr; + std::array _tensor_inputs; + std::array _tensor_outputs; + + ggml_qnn_graph(const ggml_qnn_graph &) = delete; + void operator=(const ggml_qnn_graph &) = delete; + ggml_qnn_graph(ggml_qnn_graph &&) = delete; + void operator=(ggml_qnn_graph &&) = delete; +}; +} // namespace qnn