diff --git a/tests/ggml-qnn/ggml-qnn-ut.cpp b/tests/ggml-qnn/ggml-qnn-ut.cpp index f19a6355d..fefb26244 100644 --- a/tests/ggml-qnn/ggml-qnn-ut.cpp +++ b/tests/ggml-qnn/ggml-qnn-ut.cpp @@ -1,67 +1,67 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include +#include #include +#include #include -#include +#include +#include +#include +#include +#include +#include #include +#include +#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include #include "ggml.h" + #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml-qnn.h" -#define GGML_QNN_DEBUG 1 +#define GGML_QNN_DEBUG 1 #define GGML_QNN_LOGBUF_LEN 4096 -#define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) -#define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) -#define QNN_LOG_INFO(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) +#define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) +#define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) +#define QNN_LOG_INFO(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #if GGML_QNN_DEBUG -#define QNN_LOG_DEBUG(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) +#define QNN_LOG_DEBUG(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #else #define QNN_LOG_DEBUG(...) #endif -static void tensor_dump(const ggml_tensor * tensor, const char * name); +static void tensor_dump(const ggml_tensor *tensor, const char *name); #define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor) -static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...) { +static void ggml_qnn_log_internal(ggml_log_level level, const char *file, const char *func, int line, + const char *format, ...) { static std::mutex ggml_qnn_log_internal_mutex; static char s_ggml_qnn_log_internal_buf[GGML_QNN_LOGBUF_LEN]; @@ -78,7 +78,7 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const } } -static const char * get_qnn_backend_name(int n_backend_type) { +static const char *get_qnn_backend_name(int n_backend_type) { switch (n_backend_type) { case QNN_BACKEND_CPU: return "QNN-CPU"; @@ -93,13 +93,9 @@ static const char * get_qnn_backend_name(int n_backend_type) { } } -static bool ggml_graph_compute_helper( - struct ggml_backend * backend, - struct ggml_cgraph * graph, - std::vector & buf, - int n_threads, - ggml_abort_callback abort_callback, - void * abort_callback_data) { +static bool ggml_graph_compute_helper(struct ggml_backend *backend, struct ggml_cgraph *graph, + std::vector &buf, int n_threads, ggml_abort_callback abort_callback, + void *abort_callback_data) { struct ggml_cplan plan = ggml_graph_plan(graph, n_threads); plan.abort_callback = abort_callback; @@ -129,8 +125,8 @@ static bool ggml_graph_compute_helper( #define QK8_0 32 typedef struct { - uint16_t d; // delta - int8_t qs[QK8_0]; // quants + uint16_t d; // delta + int8_t qs[QK8_0]; // quants } block_q8_0; static inline float ggml_compute_fp16_to_fp32(uint16_t h) { @@ -141,12 +137,11 @@ static inline float ggml_compute_fp16_to_fp32(uint16_t h) { #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) -static void tensor_dump(const ggml_tensor * tensor, const char * name) { - QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - name, tensor->name, - tensor->type, ggml_type_name(tensor->type), - tensor->ne[0], tensor->ne[1], tensor->ne[2], - tensor->nb[0], tensor->nb[1], tensor->nb[2]); +static void tensor_dump(const ggml_tensor *tensor, const char *name) { + QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 + ", nb = (%5zi, %5zi, %5zi)\n", + name, tensor->name, tensor->type, ggml_type_name(tensor->type), tensor->ne[0], tensor->ne[1], + tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]); float value = 0; std::ostringstream tmposs; @@ -160,10 +155,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) { for (int i = 0; i < tensor->ne[2]; i++) { for (int j = 0; j < tensor->ne[1]; j++) { for (int k = 0; k < tensor->ne[0]; k++) { - value = ((int8_t *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + - j * tensor->ne[0] + k]; - tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value - << " "; + value = ((int8_t *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k]; + tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " "; } tmposs << "\n"; } @@ -181,10 +174,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) { for (int i = 0; i < tensor->ne[2]; i++) { for (int j = 0; j < tensor->ne[1]; j++) { for (int k = 0; k < tensor->ne[0]; k++) { - value = ((float *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + - j * tensor->ne[0] + k]; - tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value - << " "; + value = ((float *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k]; + tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " "; } tmposs << "\n"; } @@ -202,11 +193,11 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) { for (int i = 0; i < tensor->ne[2]; i++) { for (int j = 0; j < tensor->ne[1]; j++) { for (int k = 0; k < tensor->ne[0]; k++) { - unsigned short tmpvalue = ((unsigned short *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + - j * tensor->ne[0] + k]; + unsigned short tmpvalue = + ((unsigned short *) + tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k]; value = GGML_FP16_TO_FP32(tmpvalue); - tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value - << " "; + tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " "; } tmposs << "\n"; } @@ -220,15 +211,14 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) { } if (tensor->type == GGML_TYPE_Q8_0) { - block_q8_0 * tmp = ((block_q8_0 *)tensor->data); - for (int j = 0; j < tensor->ne[1]; j++) { - int n = tensor->ne[0] / QK8_0; //blocks per row + block_q8_0 *tmp = ((block_q8_0 *)tensor->data); + for (int j = 0; j < tensor->ne[1]; j++) { + int n = tensor->ne[0] / QK8_0; // blocks per row for (int z = 0; z < n; z++) { - const float d = GGML_FP16_TO_FP32(tmp[ j * n + z ].d); + const float d = GGML_FP16_TO_FP32(tmp[j * n + z].d); for (int k = 0; k < QK8_0; k++) { value = tmp[j * n + z].qs[k] * d; - tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value - << " "; + tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " "; } } tmposs << "\n"; @@ -241,7 +231,7 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) { } } -static uint32_t get_tensor_rank(const ggml_tensor * tensor) { +static uint32_t get_tensor_rank(const ggml_tensor *tensor) { uint32_t rank = 0; for (int i = 0; i < GGML_MAX_DIMS; i++) { if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) { @@ -251,7 +241,7 @@ static uint32_t get_tensor_rank(const ggml_tensor * tensor) { return rank; } -static uint32_t get_tensor_data_size(const ggml_tensor * tensor) { +static uint32_t get_tensor_data_size(const ggml_tensor *tensor) { size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]); size_t n_dims = get_tensor_rank(tensor); for (int i = 1; i < n_dims; i++) { @@ -264,8 +254,8 @@ static uint32_t get_tensor_data_size(const ggml_tensor * tensor) { return ggml_nbytes(tensor); } -//ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L20 -static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { +// ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L20 +static void init_tensor_uniform(ggml_tensor *tensor, float min = -1.0f, float max = 1.0f) { size_t size = ggml_nelements(tensor); std::vector data(size); for (size_t i = 0; i < size; i++) { @@ -274,7 +264,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) { #ifdef GGML_USE_QNN - memcpy((char*)tensor->data, data.data(), size * sizeof(float)); + memcpy((char *)tensor->data, data.data(), size * sizeof(float)); #else ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); #endif @@ -282,25 +272,25 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0); std::vector dataq(ggml_row_size(tensor->type, size)); std::vector imatrix(tensor->ne[0], 1.0f); // dummy importance matrix - const float * im = imatrix.data(); + const float *im = imatrix.data(); if (!ggml_quantize_requires_imatrix(tensor->type)) { // when the imatrix is optional, we want to test both quantization with and without imatrix // use one of the random numbers to decide - if (data[0] > 0.5f*(min + max)) { + if (data[0] > 0.5f * (min + max)) { im = nullptr; } } - ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im); + ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size / tensor->ne[0], tensor->ne[0], im); GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size())); #ifdef GGML_USE_QNN - memcpy((char*)tensor->data, dataq.data(), dataq.size()); + memcpy((char *)tensor->data, dataq.data(), dataq.size()); #else ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); #endif } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) { // This is going to create some weird integers though. #ifdef GGML_USE_QNN - memcpy((char*)tensor->data, data.data(), ggml_nbytes(tensor)); + memcpy((char *)tensor->data, data.data(), ggml_nbytes(tensor)); #else ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor)); #endif @@ -309,125 +299,117 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m } } -//ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L310 -static void initialize_tensors(ggml_context * ctx) { - for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) { +// ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L310 +static void initialize_tensors(ggml_context *ctx) { + for (ggml_tensor *t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) { init_tensor_uniform(t); } } static void show_usage() { - printf(" " \ - "\nUsage: test_qnn_ops [options]\n" \ - "\n" \ - "Options:\n" \ - " -t GGML_OP_ADD / GGML_OP_MULMAT\n" \ - " -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n" \ - " ?/h print usage infomation\n\n" - ); + printf( + " " + "\nUsage: test_qnn_ops [options]\n" + "\n" + "Options:\n" + " -t GGML_OP_ADD / GGML_OP_MULMAT\n" + " -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n" + " ?/h print usage infomation\n\n"); } -typedef ggml_tensor * (*ggml_op_unary_t)( - ggml_context * ctx, - ggml_tensor * a); +typedef ggml_tensor *(*ggml_op_unary_t)(ggml_context *ctx, ggml_tensor *a); -typedef ggml_tensor * (*ggml_op_binary_t)( - ggml_context * ctx, - ggml_tensor * a, - ggml_tensor * b); +typedef ggml_tensor *(*ggml_op_binary_t)(ggml_context *ctx, ggml_tensor *a, ggml_tensor *b); static constexpr const ggml_op_unary_t kUnaryOps[] = { - nullptr, // GGML_OP_NONE - nullptr, // GGML_OP_DUP - nullptr, // GGML_OP_ADD - nullptr, // GGML_OP_ADD1 - nullptr, // GGML_OP_ACC - nullptr, // GGML_OP_SUB - nullptr, // GGML_OP_MUL - nullptr, // GGML_OP_DIV - nullptr, // GGML_OP_SQR - ggml_sqrt, // GGML_OP_SQRT - ggml_log, // GGML_OP_LOG - nullptr, // GGML_OP_SUM - nullptr, // GGML_OP_SUM_ROWS - nullptr, // GGML_OP_MEAN - nullptr, // GGML_OP_ARGMAX - nullptr, // GGML_OP_REPEAT - nullptr, // GGML_OP_REPEAT_BACK - nullptr, // GGML_OP_CONCAT - nullptr, // GGML_OP_SILU_BACK - nullptr, // GGML_OP_NORM - nullptr, // GGML_OP_RMS_NORM - nullptr, // GGML_OP_RMS_NORM_BACK - nullptr, // GGML_OP_GROUP_NORM - nullptr, // GGML_OP_MUL_MAT + nullptr, // GGML_OP_NONE + nullptr, // GGML_OP_DUP + nullptr, // GGML_OP_ADD + nullptr, // GGML_OP_ADD1 + nullptr, // GGML_OP_ACC + nullptr, // GGML_OP_SUB + nullptr, // GGML_OP_MUL + nullptr, // GGML_OP_DIV + nullptr, // GGML_OP_SQR + ggml_sqrt, // GGML_OP_SQRT + ggml_log, // GGML_OP_LOG + nullptr, // GGML_OP_SUM + nullptr, // GGML_OP_SUM_ROWS + nullptr, // GGML_OP_MEAN + nullptr, // GGML_OP_ARGMAX + nullptr, // GGML_OP_REPEAT + nullptr, // GGML_OP_REPEAT_BACK + nullptr, // GGML_OP_CONCAT + nullptr, // GGML_OP_SILU_BACK + nullptr, // GGML_OP_NORM + nullptr, // GGML_OP_RMS_NORM + nullptr, // GGML_OP_RMS_NORM_BACK + nullptr, // GGML_OP_GROUP_NORM + nullptr, // GGML_OP_MUL_MAT }; static constexpr const ggml_op_binary_t kBinaryOps[] = { - nullptr, // GGML_OP_NONE - nullptr, // GGML_OP_DUP - ggml_add, // GGML_OP_ADD - nullptr, // GGML_OP_ADD1 - nullptr, // GGML_OP_ACC - ggml_sub, // GGML_OP_SUB - ggml_mul, // GGML_OP_MUL - ggml_div, // GGML_OP_DIV - nullptr, // GGML_OP_SQR - nullptr, // GGML_OP_SQRT - nullptr, // GGML_OP_LOG - nullptr, // GGML_OP_SUM - nullptr, // GGML_OP_SUM_ROWS - nullptr, // GGML_OP_MEAN - nullptr, // GGML_OP_ARGMAX - nullptr, // GGML_OP_REPEAT - nullptr, // GGML_OP_REPEAT_BACK - nullptr, // GGML_OP_CONCAT - nullptr, // GGML_OP_SILU_BACK - nullptr, // GGML_OP_NORM - nullptr, // GGML_OP_RMS_NORM - nullptr, // GGML_OP_RMS_NORM_BACK - nullptr, // GGML_OP_GROUP_NORM - ggml_mul_mat, // GGML_OP_MUL_MAT + nullptr, // GGML_OP_NONE + nullptr, // GGML_OP_DUP + ggml_add, // GGML_OP_ADD + nullptr, // GGML_OP_ADD1 + nullptr, // GGML_OP_ACC + ggml_sub, // GGML_OP_SUB + ggml_mul, // GGML_OP_MUL + ggml_div, // GGML_OP_DIV + nullptr, // GGML_OP_SQR + nullptr, // GGML_OP_SQRT + nullptr, // GGML_OP_LOG + nullptr, // GGML_OP_SUM + nullptr, // GGML_OP_SUM_ROWS + nullptr, // GGML_OP_MEAN + nullptr, // GGML_OP_ARGMAX + nullptr, // GGML_OP_REPEAT + nullptr, // GGML_OP_REPEAT_BACK + nullptr, // GGML_OP_CONCAT + nullptr, // GGML_OP_SILU_BACK + nullptr, // GGML_OP_NORM + nullptr, // GGML_OP_RMS_NORM + nullptr, // GGML_OP_RMS_NORM_BACK + nullptr, // GGML_OP_GROUP_NORM + ggml_mul_mat, // GGML_OP_MUL_MAT }; static_assert(kBinaryOps[GGML_OP_MUL_MAT] == ggml_mul_mat, "ggml_mul_mat at wrong index, check kBinaryOps"); static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) { - int64_t n_begin_time = 0LL; - int64_t n_end_time = 0LL; - int64_t n_duration = 0LL; - size_t ctx_size = 0; - int sizey = 4; - int sizex = 4; + int64_t n_begin_time = 0LL; + int64_t n_end_time = 0LL; + int64_t n_duration = 0LL; + size_t ctx_size = 0; + int sizey = 4; + int sizex = 4; - struct ggml_context * ctx = nullptr; - struct ggml_cgraph * gf = nullptr; - struct ggml_tensor * src0 = nullptr; - struct ggml_tensor * src1 = nullptr; - struct ggml_tensor * dst = nullptr; - ggml_backend_t backend = nullptr; - ggml_backend_buffer_t buffer= nullptr; + struct ggml_context *ctx = nullptr; + struct ggml_cgraph *gf = nullptr; + struct ggml_tensor *src0 = nullptr; + struct ggml_tensor *src1 = nullptr; + struct ggml_tensor *dst = nullptr; + ggml_backend_t backend = nullptr; + ggml_backend_buffer_t buffer = nullptr; - ggml_type qtype = GGML_TYPE_I8; - qtype = GGML_TYPE_F16; - qtype = GGML_TYPE_Q8_0; - qtype = GGML_TYPE_F32; + ggml_type qtype = GGML_TYPE_I8; + qtype = GGML_TYPE_F16; + qtype = GGML_TYPE_Q8_0; + qtype = GGML_TYPE_F32; std::vector work_buffer; QNN_LOG_DEBUG("enter qnn_ggml_op\n"); - QNN_LOG_DEBUG("ggml op:%d(%s)\n", n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type)); + QNN_LOG_DEBUG("ggml op:%d(%s)\n", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type)); n_begin_time = ggml_time_us(); ctx_size += 1024 * 1024 * 32; - QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size, - (ctx_size / 1024 / 1024)); + QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size, (ctx_size / 1024 / 1024)); - struct ggml_init_params params = { - /*.mem_size =*/ ctx_size, - /*.mem_buffer =*/ NULL, - /* no_alloc =*/ 0 - }; + struct ggml_init_params params = { /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /* no_alloc =*/0 }; if (n_backend_type != QNN_BACKEND_GGML) { params.no_alloc = true; @@ -470,8 +452,7 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) { } else if (binary_op) { dst = binary_op(ctx, src0, src1); } else { - QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type, - ggml_op_name((enum ggml_op) n_ggml_op_type)); + QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type)); ggml_free(ctx); ggml_backend_free(backend); return 3; @@ -504,17 +485,17 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) { TENSOR_DUMP(src1); TENSOR_DUMP(dst); } else { - QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src0->name, - src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], + QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 + ", nb = (%5zi, %5zi, %5zi)\n", + src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0], src0->nb[1], src0->nb[2]); - QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - src1->name, - src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], + QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 + ", nb = (%5zi, %5zi, %5zi)\n", + src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0], src1->nb[1], src1->nb[2]); - QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", - dst->name, - dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], + QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 + ", nb = (%5zi, %5zi, %5zi)\n", + dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], dst->nb[1], dst->nb[2]); } @@ -524,26 +505,22 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) { n_end_time = ggml_time_us(); n_duration = (n_end_time - n_begin_time) / 1000; - QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n", ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration); + QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n", + ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration); return 0; } #define DEFINE_OP(op) { #op, op } static const std::unordered_map kMapStringToGGMLOp = { - DEFINE_OP(GGML_OP_ADD), - DEFINE_OP(GGML_OP_SUB), - DEFINE_OP(GGML_OP_MUL), - DEFINE_OP(GGML_OP_DIV), - DEFINE_OP(GGML_OP_SQRT), - DEFINE_OP(GGML_OP_MUL_MAT), - DEFINE_OP(GGML_OP_LOG), + DEFINE_OP(GGML_OP_ADD), DEFINE_OP(GGML_OP_SUB), DEFINE_OP(GGML_OP_MUL), DEFINE_OP(GGML_OP_DIV), + DEFINE_OP(GGML_OP_SQRT), DEFINE_OP(GGML_OP_MUL_MAT), DEFINE_OP(GGML_OP_LOG), }; -int main(int argc, char * argv[]) { - int num_threads = 4; - int n_backend_type = QNN_BACKEND_CPU; - int n_ggml_op_type = GGML_OP_ADD; +int main(int argc, char *argv[]) { + int num_threads = 4; + int n_backend_type = QNN_BACKEND_CPU; + int n_ggml_op_type = GGML_OP_ADD; for (int i = 1; i < argc; i++) { if (0 == strcmp(argv[i], "-t")) { @@ -561,7 +538,7 @@ int main(int argc, char * argv[]) { if (i + 1 < argc) { int backend = atoi(argv[i + 1]); if (backend <= QNN_BACKEND_GGML) - n_backend_type = backend; + n_backend_type = backend; else { show_usage(); return 1; @@ -575,9 +552,9 @@ int main(int argc, char * argv[]) { } QNN_LOG_DEBUG("enter qnn_ggml_op\n"); - QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type)); + QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type, + ggml_op_name((enum ggml_op)n_ggml_op_type)); qnn_op_ut(num_threads, n_backend_type, n_ggml_op_type); - return 0; }