From 3fe07eb9075e513066a07a7274b26d47a5c8ee6e Mon Sep 17 00:00:00 2001 From: hongruichen Date: Wed, 19 Jun 2024 14:47:41 +0800 Subject: [PATCH] fix compiling error --- ggml-qnn.cpp | 84 ++++++++++++++++-------------------------- ggml-qnn/backend.hpp | 2 +- ggml-qnn/qnn-types.hpp | 23 +++++++++--- ggml-qnn/qnn.hpp | 9 ++--- ggml-qnn/tensor.hpp | 10 ++--- ggml-qnn/utils.hpp | 13 +++++++ 6 files changed, 72 insertions(+), 69 deletions(-) diff --git a/ggml-qnn.cpp b/ggml-qnn.cpp index b59126067..fdbcbdafb 100644 --- a/ggml-qnn.cpp +++ b/ggml-qnn.cpp @@ -70,27 +70,27 @@ typedef void (*ggml_qnn_func_t)(ggml_backend_qnn_context * ctx, static struct qnn::qcom_socinfo g_qnn_soc_info_table[] = { /* Qualcomm SnapDragon 8 Gen 1 */ - [SM8450] = { - .soc_model = SM8450, - .htp_arch = V69, + [qnn::SM8450] = { + .soc_model = qnn::SM8450, + .htp_arch = qnn::V69, .vtcm_size_in_mb = 8}, /* Qualcomm SnapDragon 8 Gen 1+ */ - [SM8475] = { - .soc_model = SM8475, - .htp_arch = V69, + [qnn::SM8475] = { + .soc_model = qnn::SM8475, + .htp_arch = qnn::V69, .vtcm_size_in_mb = 8}, /* Qualcomm SnapDragon 8 Gen 2 */ - [SM8550] = { - .soc_model = SM8550, - .htp_arch = V73, + [qnn::SM8550] = { + .soc_model = qnn::SM8550, + .htp_arch = qnn::V73, .vtcm_size_in_mb = 8}, /* Qualcomm SnapDragon 8 Gen 3 */ - [SM8650] = { - .soc_model = SM8650, - .htp_arch = V75, + [qnn::SM8650] = { + .soc_model = qnn::SM8650, + .htp_arch = qnn::V75, .vtcm_size_in_mb = 8}, }; @@ -198,19 +198,6 @@ static const char * qnn_opname_from_ggmlop(enum ggml_op ggmlop) { return nullptr; } -static uint32_t qnn_get_ggml_tensor_data_size(const ggml_tensor * tensor) { - /* - size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]); - size_t n_dims = qnn_get_ggml_tensor_rank(tensor); - for (int i = 1; i < n_dims; i++) { - data_size *= tensor->ne[i]; - } - - return data_size; - */ - return ggml_nbytes(tensor); -} - static bool qnn_is_valid_params(ggml_backend_qnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { if ((nullptr == ctx) || (nullptr == src0) || (nullptr == src1) || (nullptr == dst)) { @@ -218,10 +205,10 @@ static bool qnn_is_valid_params(ggml_backend_qnn_context * ctx, const ggml_tenso return false; } - qnn_instance * instance = nullptr; - Qnn_Tensor_t * tensor_0 = nullptr; - Qnn_Tensor_t * tensor_1 = nullptr; - Qnn_Tensor_t * tensor_2 = nullptr; + qnn_internal::qnn_instance *instance = nullptr; + Qnn_Tensor_t *tensor_0 = nullptr; + Qnn_Tensor_t *tensor_1 = nullptr; + Qnn_Tensor_t *tensor_2 = nullptr; tensor_0 = (Qnn_Tensor_t *) src0->extra; tensor_1 = (Qnn_Tensor_t *) src1->extra; tensor_2 = (Qnn_Tensor_t *) dst->extra; @@ -283,13 +270,6 @@ public: }; #endif -using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize); -using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders); -using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders); - -#define RPCMEM_DEFAULT_FLAGS 1 -#define RPCMEM_HEAP_ID_SYSTEM 25 - #define VALIDATE(value, status) \ do { \ status = value; \ @@ -625,11 +605,11 @@ static void ggml_qnn_add(ggml_backend_qnn_context * ctx, const ggml_tensor * src const ggml_tensor * src1, ggml_tensor * dst) { Qnn_ErrorHandle_t error = QNN_SUCCESS; bool graph_initialized = false; - qnn_instance * instance = nullptr; - std::string graph_name = "ggml_op_qnn_add"; - Qnn_GraphHandle_t graph_handle = nullptr; - Qnn_Param_t qnn_params[] = {}; - enum ggml_op ggmlop = GGML_OP_ADD; + qnn_internal::qnn_instance *instance = nullptr; + std::string graph_name = "ggml_op_qnn_add"; + Qnn_GraphHandle_t graph_handle = nullptr; + Qnn_Param_t qnn_params[] = {}; + enum ggml_op ggmlop = GGML_OP_ADD; CHECK_PARAMS(ctx, src0, src1, dst); instance = ctx->instance; @@ -817,13 +797,13 @@ failure: static void ggml_qnn_mul_mat(ggml_backend_qnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - Qnn_ErrorHandle_t error = QNN_SUCCESS; - bool graph_initialized = false; - qnn_instance * instance = nullptr; - std::string graph_name = "ggml_op_qnn_mul_mat"; - Qnn_GraphHandle_t graph_handle = nullptr; - Qnn_Param_t qnn_params[] = {}; - enum ggml_op ggmlop = GGML_OP_MUL_MAT; + Qnn_ErrorHandle_t error = QNN_SUCCESS; + bool graph_initialized = false; + qnn_internal::qnn_instance *instance = nullptr; + std::string graph_name = "ggml_op_qnn_mul_mat"; + Qnn_GraphHandle_t graph_handle = nullptr; + Qnn_Param_t qnn_params[] = {}; + enum ggml_op ggmlop = GGML_OP_MUL_MAT; CHECK_PARAMS(ctx, src0, src1, dst); instance = ctx->instance; @@ -1492,8 +1472,9 @@ GGML_CALL static void ggml_backend_qnn_free(ggml_backend_t backend) { ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context; QNN_LOG_INFO("idx %d, name:%s", ctx->device, g_qnn_mgr[ctx->device].name); - qnn_instance * instance = (qnn_instance *)g_qnn_mgr[ctx->device].instance; + auto *instance = g_qnn_mgr[ctx->device].instance; if (instance != nullptr) { + // TODO: this should be done inside the destructor std::map>::iterator graph_it; @@ -1721,9 +1702,8 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) { } } - qnn_instance * instance = nullptr; - instance = new qnn_instance(qnn_lib_path, g_qnn_mgr[device].lib, ""); - result = instance->qnn_init(nullptr); + auto *instance = new qnn_internal::qnn_instance(qnn_lib_path, g_qnn_mgr[device].lib, ""); + result = instance->qnn_init(nullptr); if (0 != result) { QNN_LOG_WARN( "init qnn subsystem failed with qnn backend %s, pls check why\n", diff --git a/ggml-qnn/backend.hpp b/ggml-qnn/backend.hpp index 3a624eab0..fd40d8ad2 100644 --- a/ggml-qnn/backend.hpp +++ b/ggml-qnn/backend.hpp @@ -20,5 +20,5 @@ struct ggml_backend_qnn_context { struct ggml_backend* backend; QNN_INTERFACE_VER_TYPE raw_interface; QNN_SYSTEM_INTERFACE_VER_TYPE raw_system_interface; - struct qcom_socinfo socinfo; + qnn::qcom_socinfo socinfo; }; diff --git a/ggml-qnn/qnn-types.hpp b/ggml-qnn/qnn-types.hpp index 33f468eb7..db1d592f0 100644 --- a/ggml-qnn/qnn-types.hpp +++ b/ggml-qnn/qnn-types.hpp @@ -1,6 +1,12 @@ #pragma once +#include "QnnTypes.h" +#include "QnnCommon.h" +#include "QnnInterface.h" +#include "Saver/QnnSaver.h" +#include "System/QnnSystemInterface.h" + namespace qnn { // ================================================================================================= // @@ -30,17 +36,24 @@ namespace qnn { SM8650 = 57, // v75 }; + struct qcom_socinfo { + uint32_t soc_model; + size_t htp_arch; + size_t vtcm_size_in_mb; + }; + using pfn_rpc_mem_init = void (*)(void); using pfn_rpc_mem_deinit = void (*)(void); using pfn_rpc_mem_alloc = void* (*) (int, uint32_t, int); using pfn_rpc_mem_free = void (*)(void*); using pfn_rpc_mem_to_fd = int (*)(void*); - struct qcom_socinfo { - uint32_t soc_model; - size_t htp_arch; - size_t vtcm_size_in_mb; - }; + using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize); + using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders); + using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders); } #define QNN_VER_PTR(x) (&((x).v1)) // TODO: remove this macro after we have a separate header for QNN + +#define RPCMEM_DEFAULT_FLAGS 1 +#define RPCMEM_HEAP_ID_SYSTEM 25 diff --git a/ggml-qnn/qnn.hpp b/ggml-qnn/qnn.hpp index 15df7dcbb..8d8ab72b4 100644 --- a/ggml-qnn/qnn.hpp +++ b/ggml-qnn/qnn.hpp @@ -11,9 +11,6 @@ #include "QnnGraph.h" #include "QnnProperty.h" #include "QnnTensor.h" -#include "QnnInterface.h" -#include "Saver/QnnSaver.h" -#include "System/QnnSystemInterface.h" #include "HTP/QnnHtpDevice.h" #include "HTP/QnnHtpGraph.h" @@ -864,7 +861,7 @@ namespace qnn_internal { return _qnn_mem_set.count(buf) != 0U; } - const qnn::qcom_socinfo &get_soc_info() { return _soc_info; } + const qnn::qcom_socinfo& get_soc_info() { return _soc_info; } public: std::map( + reinterpret_cast( dlsym(_system_lib_handle, "QnnSystemInterface_getProviders")); if (nullptr == get_providers) { QNN_LOG_WARN( @@ -988,7 +985,7 @@ namespace qnn_internal { } auto get_providers = - load_qnn_functionpointers( + qnn::load_qnn_functionpointers( lib_handle, "QnnInterface_getProviders"); if (nullptr == get_providers) { QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s", dlerror()); diff --git a/ggml-qnn/tensor.hpp b/ggml-qnn/tensor.hpp index 514061146..687ebd890 100644 --- a/ggml-qnn/tensor.hpp +++ b/ggml-qnn/tensor.hpp @@ -45,7 +45,7 @@ namespace qnn { QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type; if (is_npu) { - qnn_instance* instance = ctx->instance; + auto* instance = ctx->instance; uint8_t* qnn_buffer = static_cast( instance->alloc_rpcmem(ggml_nbytes(tensor), alignof(void*))); if (!qnn_buffer) { @@ -68,7 +68,7 @@ namespace qnn { } else { QNN_VER_PTR(*_qnn_tensor)->clientBuf = { - tensor->data, qnn_get_ggml_tensor_data_size(tensor) }; + tensor->data, get_ggml_tensor_data_size(tensor) }; } } @@ -76,7 +76,7 @@ namespace qnn { ggml_backend_qnn_context* ctx) : _tensor(tensor), _qnn_tensor(qnn_tensor), _context(ctx) { _old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions; - const auto qnn_data_type = qnn_datatype_from_ggml_datatype(tensor->type); + const auto qnn_data_type = qnn::datatype_from_ggml_datatype(tensor->type); const bool is_npu = ctx->device == QNN_BACKEND_NPU; _dimensions[0] = (uint32_t)tensor->ne[0]; @@ -84,7 +84,7 @@ namespace qnn { _dimensions[2] = (uint32_t)tensor->ne[2]; _dimensions[3] = (uint32_t)tensor->ne[3]; QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions; - QNN_VER_PTR(*_qnn_tensor)->rank = qnn_get_ggml_tensor_rank(tensor); + QNN_VER_PTR(*_qnn_tensor)->rank = get_ggml_tensor_rank(tensor); QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type; if (is_npu) { @@ -104,7 +104,7 @@ namespace qnn { } else { QNN_VER_PTR(*_qnn_tensor)->clientBuf = { - tensor->data, qnn_get_ggml_tensor_data_size(tensor) }; + tensor->data, get_ggml_tensor_data_size(tensor) }; } } diff --git a/ggml-qnn/utils.hpp b/ggml-qnn/utils.hpp index 4141c4e33..c952e8bc2 100644 --- a/ggml-qnn/utils.hpp +++ b/ggml-qnn/utils.hpp @@ -96,4 +96,17 @@ namespace qnn { offset % static_cast(alignment)); } + uint32_t get_ggml_tensor_data_size(const ggml_tensor* tensor) { + /* + size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]); + size_t n_dims = qnn_get_ggml_tensor_rank(tensor); + for (int i = 1; i < n_dims; i++) { + data_size *= tensor->ne[i]; + } + + return data_size; + */ + return ggml_nbytes(tensor); + } + }