From 58cec140920985f2e038512001869fdb3cf86ad8 Mon Sep 17 00:00:00 2001 From: hongruichen Date: Fri, 5 Jul 2024 17:31:22 +0800 Subject: [PATCH] reformat --- ggml/src/ggml-qnn.cpp | 453 ++++++++++++++++-------------------- ggml/src/ggml-qnn/utils.hpp | 2 +- 2 files changed, 203 insertions(+), 252 deletions(-) diff --git a/ggml/src/ggml-qnn.cpp b/ggml/src/ggml-qnn.cpp index 19c970c5f..a590dd5f5 100644 --- a/ggml/src/ggml-qnn.cpp +++ b/ggml/src/ggml-qnn.cpp @@ -1,46 +1,46 @@ +#include "ggml-qnn.h" + +#include #include #include -#include #include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include - -#include "ggml-qnn.h" +#include #include "ggml-backend-impl.h" -#include "ggml-qnn/logger.hpp" -#include "ggml-qnn/utils.hpp" -#include "ggml-qnn/tensor.hpp" -#include "ggml-qnn/backend.hpp" #include "ggml-qnn/backend-ops.hpp" +#include "ggml-qnn/backend.hpp" +#include "ggml-qnn/logger.hpp" +#include "ggml-qnn/tensor.hpp" +#include "ggml-qnn/utils.hpp" // ================================================================================================= // // forward declaration // // ================================================================================================= -static int free_qnn_tensor(Qnn_Tensor_t & tensor); +static int free_qnn_tensor(Qnn_Tensor_t &tensor); // ================================================================================================= // @@ -48,37 +48,25 @@ static int free_qnn_tensor(Qnn_Tensor_t & tensor); // // ================================================================================================= #ifdef NDEBUG -#define ENABLE_QNNBACKEND_PERF 0 // enable/disable op's perf info +#define ENABLE_QNNBACKEND_PERF 0 // enable/disable op's perf info #else -#define ENABLE_QNNBACKEND_PERF 1 // enable/disable op's perf info +#define ENABLE_QNNBACKEND_PERF 1 // enable/disable op's perf info #endif -#define QNN_BACKEND_NAME "qnn" +#define QNN_BACKEND_NAME "qnn" static struct qnn::qcom_socinfo g_qnn_soc_info_table[] = { - /* Qualcomm SnapDragon 8 Gen 1 */ - [qnn::SM8450] = { - .soc_model = qnn::SM8450, - .htp_arch = qnn::V69, - .vtcm_size_in_mb = 8}, + /* Qualcomm SnapDragon 8 Gen 1 */ + [qnn::SM8450] = { .soc_model = qnn::SM8450, .htp_arch = qnn::V69, .vtcm_size_in_mb = 8 }, - /* Qualcomm SnapDragon 8 Gen 1+ */ - [qnn::SM8475] = { - .soc_model = qnn::SM8475, - .htp_arch = qnn::V69, - .vtcm_size_in_mb = 8}, + /* Qualcomm SnapDragon 8 Gen 1+ */ + [qnn::SM8475] = { .soc_model = qnn::SM8475, .htp_arch = qnn::V69, .vtcm_size_in_mb = 8 }, - /* Qualcomm SnapDragon 8 Gen 2 */ - [qnn::SM8550] = { - .soc_model = qnn::SM8550, - .htp_arch = qnn::V73, - .vtcm_size_in_mb = 8}, + /* Qualcomm SnapDragon 8 Gen 2 */ + [qnn::SM8550] = { .soc_model = qnn::SM8550, .htp_arch = qnn::V73, .vtcm_size_in_mb = 8 }, - /* Qualcomm SnapDragon 8 Gen 3 */ - [qnn::SM8650] = { - .soc_model = qnn::SM8650, - .htp_arch = qnn::V75, - .vtcm_size_in_mb = 8}, + /* Qualcomm SnapDragon 8 Gen 3 */ + [qnn::SM8650] = { .soc_model = qnn::SM8650, .htp_arch = qnn::V75, .vtcm_size_in_mb = 8 }, }; @@ -96,52 +84,50 @@ static struct qnn::qcom_socinfo g_qnn_soc_info_table[] = { // HMX(Hexagon Matrix eXtensions)/HTA(Hexagon Tensor Accelerator) static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = { - [QNN_BACKEND_CPU] = {.device = 0, - .threads = 1, - .name = "qnn-cpu", - .lib = "libQnnCpu.so", - .instance = nullptr, - .backend = nullptr, - .raw_interface = {}, - .raw_system_interface = {}, - .socinfo = {}}, + [QNN_BACKEND_CPU] = { .device = 0, + .threads = 1, + .name = "qnn-cpu", + .lib = "libQnnCpu.so", + .instance = nullptr, + .backend = nullptr, + .raw_interface = {}, + .raw_system_interface = {}, + .socinfo = {} }, - [QNN_BACKEND_GPU] = {.device = 1, - .threads = 1, - .name = "qnn-gpu", - .lib = "libQnnGpu.so", - .instance = nullptr, - .backend = nullptr, - .raw_interface = {}, - .raw_system_interface = {}, - .socinfo = {}}, + [QNN_BACKEND_GPU] = { .device = 1, + .threads = 1, + .name = "qnn-gpu", + .lib = "libQnnGpu.so", + .instance = nullptr, + .backend = nullptr, + .raw_interface = {}, + .raw_system_interface = {}, + .socinfo = {} }, - [QNN_BACKEND_NPU] = {.device = 2, - .threads = 1, - .name = "qnn-npu", - .lib = "libQnnHtp.so", - .instance = nullptr, - .backend = nullptr, - .raw_interface = {}, - .raw_system_interface = {}, - .socinfo = {}}, + [QNN_BACKEND_NPU] = { .device = 2, + .threads = 1, + .name = "qnn-npu", + .lib = "libQnnHtp.so", + .instance = nullptr, + .backend = nullptr, + .raw_interface = {}, + .raw_system_interface = {}, + .socinfo = {} }, }; struct ggml_backend_qnn_buffer_context { - ggml_backend_qnn_buffer_context(size_t device) - : device(device) - , name(QNN_BACKEND_NAME + std::to_string(device)) {} + ggml_backend_qnn_buffer_context(size_t device) : device(device), name(QNN_BACKEND_NAME + std::to_string(device)) {} ~ggml_backend_qnn_buffer_context() { if (buffer) { free(buffer); } - for (auto * sub_buffer : sub_buffers) { + for (auto *sub_buffer : sub_buffers) { free(sub_buffer); } - for (auto * qnn_tensor : qnn_tensors) { + for (auto *qnn_tensor : qnn_tensors) { free_qnn_tensor(*qnn_tensor); free(qnn_tensor); } @@ -149,19 +135,19 @@ struct ggml_backend_qnn_buffer_context { sub_buffers.clear(); qnn_tensors.clear(); } - void * buffer = nullptr; + void *buffer = nullptr; - struct ggml_backend_qnn_context * backend_ctx = nullptr; + struct ggml_backend_qnn_context *backend_ctx = nullptr; - size_t buffer_size = 0; - std::vector sub_buffers; + size_t buffer_size = 0; + std::vector sub_buffers; std::vector qnn_tensors; - size_t device; - std::string name; + size_t device; + std::string name; }; struct ggml_backend_qnn_buffer_type_context { - size_t device; + size_t device; std::string name; }; @@ -170,7 +156,7 @@ struct ggml_backend_qnn_buffer_type_context { // QNN backend internal helper functions // // ================================================================================================= -static size_t memscpy(void * dst, size_t dst_size, const void * src, size_t copy_size) { +static size_t memscpy(void *dst, size_t dst_size, const void *src, size_t copy_size) { if (!dst || !src || !dst_size || !copy_size) return 0; size_t min_size = dst_size < copy_size ? dst_size : copy_size; @@ -180,13 +166,12 @@ static size_t memscpy(void * dst, size_t dst_size, const void * src, size_t copy return min_size; } -static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) { +static int deep_copy_qnn_tensors(Qnn_Tensor_t &src, Qnn_Tensor_t &dst) { int err = 0; VALIDATE_TENSOR_VERSION(src, err); dst.version = src.version; - QNN_TENSOR_SET_NAME( - dst, ::strndup(QNN_TENSOR_GET_NAME(src),std::string(QNN_TENSOR_GET_NAME(src)).size())); + QNN_TENSOR_SET_NAME(dst, ::strndup(QNN_TENSOR_GET_NAME(src), std::string(QNN_TENSOR_GET_NAME(src)).size())); if (nullptr == QNN_TENSOR_GET_NAME(dst)) { return 1; } @@ -197,7 +182,7 @@ static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) { QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSOR_GET_MEM_TYPE(src)); if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_RAW) { - Qnn_ClientBuffer_t client_buf = {nullptr, 0}; + Qnn_ClientBuffer_t client_buf = { nullptr, 0 }; QNN_TENSOR_SET_CLIENT_BUF(dst, client_buf); } else if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_MEMHANDLE) { QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr); @@ -205,33 +190,29 @@ static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) { return 1; } - Qnn_QuantizeParams_t src_qparam = QNN_TENSOR_GET_QUANT_PARAMS(src); - Qnn_QuantizationEncoding_t encoding = src_qparam.quantizationEncoding; + Qnn_QuantizeParams_t src_qparam = QNN_TENSOR_GET_QUANT_PARAMS(src); + Qnn_QuantizationEncoding_t encoding = src_qparam.quantizationEncoding; if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) { - Qnn_QuantizeParams_t src_qparam_cpy = src_qparam; - Qnn_AxisScaleOffset_t & axis_scale_offset = src_qparam_cpy.axisScaleOffsetEncoding; - Qnn_ScaleOffset_t ** scaleOffset = & axis_scale_offset.scaleOffset; - size_t scaleOffsetSize = axis_scale_offset.numScaleOffsets * sizeof(Qnn_ScaleOffset_t); - *scaleOffset = (Qnn_ScaleOffset_t *) malloc(scaleOffsetSize); - memscpy(*scaleOffset, scaleOffsetSize, - src_qparam.axisScaleOffsetEncoding.scaleOffset, - scaleOffsetSize); + Qnn_QuantizeParams_t src_qparam_cpy = src_qparam; + Qnn_AxisScaleOffset_t &axis_scale_offset = src_qparam_cpy.axisScaleOffsetEncoding; + Qnn_ScaleOffset_t **scaleOffset = &axis_scale_offset.scaleOffset; + size_t scaleOffsetSize = axis_scale_offset.numScaleOffsets * sizeof(Qnn_ScaleOffset_t); + *scaleOffset = (Qnn_ScaleOffset_t *)malloc(scaleOffsetSize); + memscpy(*scaleOffset, scaleOffsetSize, src_qparam.axisScaleOffsetEncoding.scaleOffset, scaleOffsetSize); QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam_cpy); } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) { - Qnn_QuantizeParams_t src_qparam_cpy = src_qparam; - Qnn_BwAxisScaleOffset_t & bwaxis_scale_offset = src_qparam_cpy.bwAxisScaleOffsetEncoding; - size_t scaleSize = bwaxis_scale_offset.numElements * sizeof(float); - float ** scales = &bwaxis_scale_offset.scales; - int32_t ** offsets = &bwaxis_scale_offset.offsets; - *scales = (float *) malloc(scaleSize); - memscpy(*scales, scaleSize, src_qparam.bwAxisScaleOffsetEncoding.scales, - scaleSize); + Qnn_QuantizeParams_t src_qparam_cpy = src_qparam; + Qnn_BwAxisScaleOffset_t &bwaxis_scale_offset = src_qparam_cpy.bwAxisScaleOffsetEncoding; + size_t scaleSize = bwaxis_scale_offset.numElements * sizeof(float); + float **scales = &bwaxis_scale_offset.scales; + int32_t **offsets = &bwaxis_scale_offset.offsets; + *scales = (float *)malloc(scaleSize); + memscpy(*scales, scaleSize, src_qparam.bwAxisScaleOffsetEncoding.scales, scaleSize); if (bwaxis_scale_offset.offsets != nullptr) { size_t offsetSize = bwaxis_scale_offset.numElements * sizeof(int32_t); - *offsets = (int32_t *) malloc(offsetSize); - memscpy(*offsets, offsetSize, - src_qparam.bwAxisScaleOffsetEncoding.offsets, offsetSize); + *offsets = (int32_t *)malloc(offsetSize); + memscpy(*offsets, offsetSize, src_qparam.bwAxisScaleOffsetEncoding.offsets, offsetSize); } QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam_cpy); } else { @@ -240,12 +221,13 @@ static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) { uint32_t rank = QNN_TENSOR_GET_RANK(src); QNN_TENSOR_SET_RANK(dst, rank); - size_t dim_size = rank * sizeof(uint32_t); - uint32_t * dimensions = (uint32_t *) malloc(dim_size); + size_t dim_size = rank * sizeof(uint32_t); + uint32_t *dimensions = (uint32_t *)malloc(dim_size); if (dimensions == nullptr) { - QNN_LOG_WARN("deep_copy_qnn_tensors() allocation error while copying " - "tensor %s\n", - QNN_TENSOR_GET_NAME(src)); + QNN_LOG_WARN( + "deep_copy_qnn_tensors() allocation error while copying " + "tensor %s\n", + QNN_TENSOR_GET_NAME(src)); return 1; } memscpy(dimensions, dim_size, QNN_TENSOR_GET_DIMENSIONS(src), dim_size); @@ -254,11 +236,11 @@ static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) { return err; } -static int free_qnn_tensor(Qnn_Tensor_t & tensor) { +static int free_qnn_tensor(Qnn_Tensor_t &tensor) { int err = 0; VALIDATE_TENSOR_VERSION(tensor, err); - free((void *) QNN_TENSOR_GET_NAME(tensor)); + free((void *)QNN_TENSOR_GET_NAME(tensor)); free(QNN_TENSOR_GET_DIMENSIONS(tensor)); return err; @@ -269,15 +251,14 @@ static int free_qnn_tensor(Qnn_Tensor_t & tensor) { // implementation of QNN backend for GGML // // ================================================================================================= -static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context * ctx, - const struct ggml_tensor * tensor, +static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context *ctx, const struct ggml_tensor *tensor, bool b_dump_tensor_info) { if (ggml_is_empty(tensor) || !qnn::ggml_qnn_op_array()[tensor->op]) { return false; } - const struct ggml_tensor * src0 = tensor->src[0]; - const struct ggml_tensor * src1 = tensor->src[1]; + const struct ggml_tensor *src0 = tensor->src[0]; + const struct ggml_tensor *src1 = tensor->src[1]; if (nullptr == src0 || nullptr == src1) { return false; } @@ -304,7 +285,7 @@ static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context * ctx, return false; } - //TODO: support other quantized data type + // TODO: support other quantized data type if (ggml_is_quantized(src0->type)) { if (src0->type != GGML_TYPE_Q8_0 && src0->type != GGML_TYPE_Q4_0) { return false; @@ -313,15 +294,15 @@ static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context * ctx, if (tensor->op == GGML_OP_MUL_MAT) { if (ne00 <= 32 || ne01 <= 32 || ne10 <= 32 || ne11 <= 32) { - //comment it for make UT of mul_mat with QNN RPC happy - //return false; + // comment it for make UT of mul_mat with QNN RPC happy + // return false; } } return true; } -bool ggml_qnn_compute_forward(ggml_backend_qnn_context * ctx, struct ggml_tensor * tensor) { +bool ggml_qnn_compute_forward(ggml_backend_qnn_context *ctx, struct ggml_tensor *tensor) { auto func = qnn::ggml_qnn_op_array()[tensor->op]; if (!func) { QNN_LOG_WARN("unsupported op %d", tensor->op); @@ -332,7 +313,7 @@ bool ggml_qnn_compute_forward(ggml_backend_qnn_context * ctx, struct ggml_tensor return true; } -static const char * ggml_backend_qnn_buffer_get_name(ggml_backend_buffer_t buffer) { +static const char *ggml_backend_qnn_buffer_get_name(ggml_backend_buffer_t buffer) { GGML_UNUSED(buffer); return "QNN"; } @@ -342,31 +323,28 @@ GGML_CALL static bool ggml_backend_buffer_is_qnn(ggml_backend_buffer_t buffer) { } GGML_CALL static void ggml_backend_qnn_buffer_free_buffer(ggml_backend_buffer_t buffer) { - ggml_backend_qnn_buffer_context * ctx = (ggml_backend_qnn_buffer_context *) buffer->context; + ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; delete ctx; } -GGML_CALL static void * ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) { - ggml_backend_qnn_buffer_context * ctx = (ggml_backend_qnn_buffer_context *) buffer->context; +GGML_CALL static void *ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) { + ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; return ctx->buffer; } -GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, - ggml_tensor * tensor) { - Qnn_ErrorHandle_t error = QNN_SUCCESS; - ggml_backend_qnn_buffer_context * ctx = (ggml_backend_qnn_buffer_context *) buffer->context; +GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) { + Qnn_ErrorHandle_t error = QNN_SUCCESS; + ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; - static int idx = 0; - char tensor_name[GGML_MAX_NAME] = {0}; + static int idx = 0; + char tensor_name[GGML_MAX_NAME] = { 0 }; snprintf(tensor_name, GGML_MAX_NAME, "tensor_%04d", idx++); - uint32_t dimensions[] = {(uint32_t) tensor->ne[0], (uint32_t) tensor->ne[1], - (uint32_t) tensor->ne[2], - (uint32_t) tensor->ne[3]}; - Qnn_DataType_t qnn_data_type = - qnn::datatype_from_ggml_datatype(tensor->type); + uint32_t dimensions[] = { (uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], (uint32_t)tensor->ne[2], + (uint32_t)tensor->ne[3] }; + Qnn_DataType_t qnn_data_type = qnn::datatype_from_ggml_datatype(tensor->type); Qnn_TensorType_t qnn_tensor_type = QNN_TENSOR_TYPE_APP_WRITE; if (tensor->flags & GGML_TENSOR_FLAG_INPUT) { @@ -381,25 +359,22 @@ GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t qnn_mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; } - qnn_tensor = { - .version = QNN_TENSOR_VERSION_1, - {.v1 = {.id = 0, - .name = tensor_name, - .type = qnn_tensor_type, - .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, - .dataType = qnn_data_type, - .quantizeParams = - {QNN_DEFINITION_UNDEFINED, - QNN_QUANTIZATION_ENCODING_UNDEFINED, - {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, - .offset = 0}}}, - .rank = qnn::get_ggml_tensor_rank(tensor), - .dimensions = dimensions, - .memType = qnn_mem_type, - {.clientBuf = {.data = nullptr, .dataSize = 0}}}}}; + qnn_tensor = { .version = QNN_TENSOR_VERSION_1, + { .v1 = { + .id = 0, + .name = tensor_name, + .type = qnn_tensor_type, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = qnn_data_type, + .quantizeParams = { QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + { .scaleOffsetEncoding = { .scale = 0.0000000000000000f, .offset = 0 } } }, + .rank = qnn::get_ggml_tensor_rank(tensor), + .dimensions = dimensions, + .memType = qnn_mem_type, + { .clientBuf = { .data = nullptr, .dataSize = 0 } } } } }; - Qnn_Tensor_t * p_qnn_tensor = - (Qnn_Tensor_t *)calloc(1, sizeof(Qnn_Tensor_t)); + Qnn_Tensor_t *p_qnn_tensor = (Qnn_Tensor_t *)calloc(1, sizeof(Qnn_Tensor_t)); if (nullptr == p_qnn_tensor) { QNN_LOG_WARN("calloc failed"); return; @@ -414,24 +389,21 @@ GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t ctx->qnn_tensors.push_back(p_qnn_tensor); } -GGML_CALL static void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, - ggml_tensor * tensor, const void * data, - size_t offset, size_t size) { +GGML_CALL static void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, + const void *data, size_t offset, size_t size) { GGML_UNUSED(buffer); - memcpy((char *) tensor->data + offset, data, size); + memcpy((char *)tensor->data + offset, data, size); } -GGML_CALL static void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, - const ggml_tensor * tensor, void * data, - size_t offset, size_t size) { +GGML_CALL static void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *tensor, + void *data, size_t offset, size_t size) { GGML_UNUSED(buffer); - memcpy(data, (const char *) tensor->data + offset, size); + memcpy(data, (const char *)tensor->data + offset, size); } -GGML_CALL static bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, - const struct ggml_tensor * src, - struct ggml_tensor * dst) { +GGML_CALL static bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor *src, + struct ggml_tensor *dst) { GGML_UNUSED(buffer); if (ggml_backend_buffer_is_host(src->buffer)) { memcpy(dst->data, src->data, ggml_nbytes(src)); @@ -442,7 +414,7 @@ GGML_CALL static bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t b } GGML_CALL static void ggml_backend_qnn_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { - ggml_backend_qnn_buffer_context * ctx = (ggml_backend_qnn_buffer_context *) buffer->context; + ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; memset(ctx->buffer, value, ctx->buffer_size); } @@ -459,13 +431,11 @@ static ggml_backend_buffer_i ggml_backend_qnn_buffer_interface = { /* .reset = */ nullptr, }; -GGML_CALL static const char * ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t buft) { - return "QNN"; -} +GGML_CALL static const char *ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t buft) { return "QNN"; } -static void * ggml_qnn_host_malloc(size_t n) { - void * data = nullptr; - int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n); +static void *ggml_qnn_host_malloc(size_t n) { + void *data = nullptr; + int result = posix_memalign((void **)&data, sysconf(_SC_PAGESIZE), n); if (result != 0) { QNN_LOG_WARN("%s: error: posix_memalign failed\n", __func__); return nullptr; @@ -474,10 +444,10 @@ static void * ggml_qnn_host_malloc(size_t n) { return data; } -GGML_CALL static ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer( - ggml_backend_buffer_type_t buft, size_t size) { - ggml_backend_qnn_buffer_type_context * buft_ctx = (ggml_backend_qnn_buffer_type_context *)buft->context; - ggml_backend_qnn_buffer_context * ctx = new ggml_backend_qnn_buffer_context(buft_ctx->device); +GGML_CALL static ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, + size_t size) { + ggml_backend_qnn_buffer_type_context *buft_ctx = (ggml_backend_qnn_buffer_type_context *)buft->context; + ggml_backend_qnn_buffer_context *ctx = new ggml_backend_qnn_buffer_context(buft_ctx->device); size_t size_page = sysconf(_SC_PAGESIZE); @@ -487,7 +457,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer } // TODO:use pre-allocated buffer in internal memory pool - ctx->buffer = ggml_qnn_host_malloc(size_aligned); + ctx->buffer = ggml_qnn_host_malloc(size_aligned); ctx->buffer_size = size_aligned; ctx->backend_ctx = &g_qnn_mgr[buft_ctx->device]; @@ -497,11 +467,10 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer return nullptr; } - return ggml_backend_buffer_init(buft, ggml_backend_qnn_buffer_interface,ctx, size); + return ggml_backend_buffer_init(buft, ggml_backend_qnn_buffer_interface, ctx, size); } -GGML_CALL static size_t ggml_backend_qnn_buffer_type_get_alignment( - ggml_backend_buffer_type_t buft) { +GGML_CALL static size_t ggml_backend_qnn_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { GGML_UNUSED(buft); return 32; } @@ -518,18 +487,16 @@ GGML_CALL static bool ggml_backend_qnn_buffer_is_host(ggml_backend_buffer_type_t return true; } -GGML_CALL static const char * ggml_backend_qnn_name(ggml_backend_t backend) { - return "QNN"; -} +GGML_CALL static const char *ggml_backend_qnn_name(ggml_backend_t backend) { return "QNN"; } GGML_CALL static void ggml_backend_qnn_free(ggml_backend_t backend) { QNN_LOG_INFO("enter %s", __func__); - ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context; + ggml_backend_qnn_context *ctx = (ggml_backend_qnn_context *)backend->context; QNN_LOG_INFO("idx %d, name:%s", ctx->device, g_qnn_mgr[ctx->device].name); auto *instance = g_qnn_mgr[ctx->device].instance; if (instance != nullptr) { - for (const auto &graph_item: ctx->qnn_graph_map) { + for (const auto &graph_item : ctx->qnn_graph_map) { QNN_LOG_INFO("graph type:%s", graph_item.first.c_str()); } @@ -548,21 +515,20 @@ GGML_CALL static void ggml_backend_qnn_free(ggml_backend_t backend) { } GGML_CALL static ggml_backend_buffer_type_t ggml_backend_qnn_get_default_buffer_type(ggml_backend_t backend) { - ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context; + ggml_backend_qnn_context *ctx = (ggml_backend_qnn_context *)backend->context; return ggml_backend_qnn_buffer_type(ctx->device); } -GGML_CALL static ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { - enum ggml_status result = GGML_STATUS_SUCCESS; - ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context; +GGML_CALL static ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, ggml_cgraph *cgraph) { + enum ggml_status result = GGML_STATUS_SUCCESS; + ggml_backend_qnn_context *ctx = (ggml_backend_qnn_context *)backend->context; GGML_UNUSED(ctx); for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_tensor * node = cgraph->nodes[i]; - if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || - node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || - node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) { + ggml_tensor *node = cgraph->nodes[i]; + if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || + node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) { continue; } bool ok = ggml_qnn_compute_forward(ctx, node); @@ -574,15 +540,14 @@ GGML_CALL static ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backe return result; } -GGML_CALL static bool ggml_backend_qnn_supports_op(ggml_backend_t backend, - const ggml_tensor * op) { - ggml_backend_qnn_context *ctx = (ggml_backend_qnn_context *) backend->context; +GGML_CALL static bool ggml_backend_qnn_supports_op(ggml_backend_t backend, const ggml_tensor *op) { + ggml_backend_qnn_context *ctx = (ggml_backend_qnn_context *)backend->context; return (ggml_qnn_can_handle_op(ctx, op, false)); } -GGML_CALL static bool ggml_backend_qnn_offload_op(ggml_backend_t backend,const ggml_tensor * tensor) { - ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context; +GGML_CALL static bool ggml_backend_qnn_offload_op(ggml_backend_t backend, const ggml_tensor *tensor) { + ggml_backend_qnn_context *ctx = (ggml_backend_qnn_context *)backend->context; return ggml_qnn_can_handle_op(ctx, tensor, false); } @@ -611,21 +576,19 @@ static ggml_backend_i ggml_backend_qnn_interface = { }; static ggml_guid_t ggml_backend_qnn_guid() { - static ggml_guid guid = { - 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81, - 0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 - }; + static ggml_guid guid = { 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81, + 0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 }; return &guid; } -static ggml_backend_t ggml_backend_qnn_reg_init(const char * params, void * user_data) { +static ggml_backend_t ggml_backend_qnn_reg_init(const char *params, void *user_data) { if (nullptr == params) { // QNN library path // can be hardcoded to "/data/local/tmp/" for Android command line application // or specified in JNI layer for Android APK params = "/data/local/tmp/"; } - ggml_backend_t qnn_backend = ggml_backend_qnn_init((int) (intptr_t) user_data, params); + ggml_backend_t qnn_backend = ggml_backend_qnn_init((int)(intptr_t)user_data, params); return qnn_backend; } @@ -637,19 +600,15 @@ bool ggml_backend_is_qnn(ggml_backend_t backend) { void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int n_threads) { GGML_ASSERT(ggml_backend_is_qnn(backend)); - auto * ctx = (ggml_backend_qnn_context *) backend->context; + auto *ctx = (ggml_backend_qnn_context *)backend->context; ctx->threads = n_threads; } -const char * ggml_backend_qnn_get_name(ggml_backend_t backend) { - return backend->iface.get_name(backend); -} +const char *ggml_backend_qnn_get_name(ggml_backend_t backend) { return backend->iface.get_name(backend); } -int ggml_backend_qnn_get_device_count() { - return GGML_QNN_MAX_DEVICES; -} +int ggml_backend_qnn_get_device_count() { return GGML_QNN_MAX_DEVICES; } -void ggml_backend_qnn_get_device_description(size_t dev_num, char * description, size_t description_size) { +void ggml_backend_qnn_get_device_description(size_t dev_num, char *description, size_t description_size) { if (nullptr == description || 0 == description_size) { QNN_LOG_WARN("invalid param"); return; @@ -665,9 +624,10 @@ void ggml_backend_qnn_get_device_description(size_t dev_num, char * description, ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t device) { if (device >= GGML_QNN_MAX_DEVICES) { - QNN_LOG_DEBUG("ggml_backend_qnn_buffer_type error: device_index:%d is " - "out of range [0, %d]\n", - device, GGML_QNN_MAX_DEVICES - 1); + QNN_LOG_DEBUG( + "ggml_backend_qnn_buffer_type error: device_index:%d is " + "out of range [0, %d]\n", + device, GGML_QNN_MAX_DEVICES - 1); return nullptr; } @@ -676,17 +636,15 @@ ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t device) { static bool ggml_backend_qnn_buffer_type_initialized = false; if (!ggml_backend_qnn_buffer_type_initialized) { for (size_t i = 0; i < GGML_QNN_MAX_DEVICES; i++) { - auto & context = ggml_backend_qnn_buffer_type_contexts[i]; + auto &context = ggml_backend_qnn_buffer_type_contexts[i]; context = { i, std::string(QNN_BACKEND_NAME) + std::to_string(i) }; ggml_backend_qnn_buffer_types[i] = { - /* .iface = */ { - /* .get_name = */ ggml_backend_qnn_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_qnn_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_qnn_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_qnn_buffer_type_get_max_size, - /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes - /* .is_host = */ ggml_backend_qnn_buffer_is_host - }, + /* .iface = */ { /* .get_name = */ ggml_backend_qnn_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_qnn_buffer_type_alloc_buffer, + /* .get_alignment = */ ggml_backend_qnn_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_qnn_buffer_type_get_max_size, + /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes + /* .is_host = */ ggml_backend_qnn_buffer_is_host }, /* .context = */ &context, }; } @@ -702,7 +660,7 @@ ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t device) { * @param qnn_lib_path qnn library path, such as "/data/local/tmp/" on Android or specified in JNI layer * @return */ -ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) { +ggml_backend_t ggml_backend_qnn_init(size_t device, const char *qnn_lib_path) { int result = 0; if (nullptr == qnn_lib_path) { @@ -729,9 +687,8 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) { QNN_LOG_ERROR("QNN NPU backend setenv failure"); } if (0 == setenv("ADSP_LIBRARY_PATH", - (path + - ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/" - "rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp") + (path + ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/" + "rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp") .c_str(), 1)) { QNN_LOG_INFO("QNN NPU backend setenv successfully"); @@ -740,20 +697,16 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) { } } else { if (0 == setenv("LD_LIBRARY_PATH", path.c_str(), 1)) { - QNN_LOG_INFO("%s backend setenv successfully\n", - qnn::get_backend_name(device)); + QNN_LOG_INFO("%s backend setenv successfully\n", qnn::get_backend_name(device)); } else { - QNN_LOG_ERROR("%s backend setenv failure\n", - qnn::get_backend_name(device)); + QNN_LOG_ERROR("%s backend setenv failure\n", qnn::get_backend_name(device)); } } auto *instance = new qnn::qnn_instance(qnn_lib_path, g_qnn_mgr[device].lib, ""); result = instance->qnn_init(nullptr); if (0 != result) { - QNN_LOG_WARN( - "init qnn subsystem failed with qnn backend %s, pls check why\n", - qnn::get_backend_name(device)); + QNN_LOG_WARN("init qnn subsystem failed with qnn backend %s, pls check why\n", qnn::get_backend_name(device)); delete instance; return nullptr; } @@ -766,15 +719,14 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) { std::string device_name = qnn::get_backend_name(device); QNN_LOG_INFO("qnn device name %s", device_name.c_str()); - g_qnn_mgr[device].instance = instance; - g_qnn_mgr[device].raw_interface = instance->get_qnn_raw_interface(); + g_qnn_mgr[device].instance = instance; + g_qnn_mgr[device].raw_interface = instance->get_qnn_raw_interface(); g_qnn_mgr[device].raw_system_interface = instance->get_qnn_raw_system_interface(); - g_qnn_mgr[device].socinfo = instance->get_soc_info(); + g_qnn_mgr[device].socinfo = instance->get_soc_info(); - ggml_backend_t qnn_backend = - new ggml_backend{/* .guid = */ ggml_backend_qnn_guid(), - /* .iface = */ ggml_backend_qnn_interface, - /* .context = */ &g_qnn_mgr[device]}; + ggml_backend_t qnn_backend = new ggml_backend{ /* .guid = */ ggml_backend_qnn_guid(), + /* .iface = */ ggml_backend_qnn_interface, + /* .context = */ &g_qnn_mgr[device] }; g_qnn_mgr[device].backend = qnn_backend; return qnn_backend; @@ -786,9 +738,8 @@ GGML_CALL int ggml_backend_qnn_reg_devices() { for (size_t idx = 0; idx < GGML_QNN_MAX_DEVICES; idx++) { char name[GGML_MAX_NAME]; ggml_backend_qnn_get_device_description(idx, name, GGML_MAX_NAME); - ggml_backend_register(name, ggml_backend_qnn_reg_init, - ggml_backend_qnn_buffer_type(idx), - (void *) (intptr_t) idx); + ggml_backend_register(name, ggml_backend_qnn_reg_init, ggml_backend_qnn_buffer_type(idx), + (void *)(intptr_t)idx); } return GGML_QNN_MAX_DEVICES; diff --git a/ggml/src/ggml-qnn/utils.hpp b/ggml/src/ggml-qnn/utils.hpp index 673fb90e6..2d830f678 100644 --- a/ggml/src/ggml-qnn/utils.hpp +++ b/ggml/src/ggml-qnn/utils.hpp @@ -30,7 +30,7 @@ Fn load_qnn_functionpointers(void *handle, const char *function_name) { return reinterpret_cast(dlsym(handle, function_name)); } -inline int validate_tensor_version(Qnn_Tensor_t tensor) { +inline int validate_tensor_version(const Qnn_Tensor_t &tensor) { if (tensor.version != QNN_TENSOR_VERSION_1) { QNN_LOG_WARN("validate_tensor_version() tensor %s, got unsupported version %d\n", tensor.v1.name, tensor.version);