format with clang-format

This commit is contained in:
hongruichen 2024-07-15 10:30:57 +08:00
parent c46b4deea9
commit 4410fd6563

View file

@ -1,45 +1,44 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stddef.h>
#include <unistd.h>
#include <inttypes.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <dlfcn.h> #include <dlfcn.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/stat.h> #include <inttypes.h>
#include <limits.h> #include <limits.h>
#include <math.h>
#include <signal.h> #include <signal.h>
#include <fcntl.h> #include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <string>
#include <vector>
#include <thread>
#include <mutex>
#include <map>
#include <set>
#include <tuple>
#include <queue>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <chrono>
#include <memory>
#include <regex>
#include <random>
#include <functional>
#include <unordered_map>
#include <condition_variable>
#include <cassert> #include <cassert>
#include <chrono>
#include <condition_variable>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <queue>
#include <random>
#include <regex>
#include <set>
#include <sstream>
#include <string>
#include <thread>
#include <tuple>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include <vector>
#include "ggml.h" #include "ggml.h"
#include "ggml-alloc.h" #include "ggml-alloc.h"
#include "ggml-backend.h" #include "ggml-backend.h"
#include "ggml-qnn.h" #include "ggml-qnn.h"
@ -48,8 +47,8 @@
#define GGML_QNN_LOGBUF_LEN 4096 #define GGML_QNN_LOGBUF_LEN 4096
#define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#define QNN_LOG_INFO(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #define QNN_LOG_INFO(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#if GGML_QNN_DEBUG #if GGML_QNN_DEBUG
#define QNN_LOG_DEBUG(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #define QNN_LOG_DEBUG(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
@ -57,11 +56,12 @@
#define QNN_LOG_DEBUG(...) #define QNN_LOG_DEBUG(...)
#endif #endif
static void tensor_dump(const ggml_tensor * tensor, const char * name); static void tensor_dump(const ggml_tensor *tensor, const char *name);
#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor) #define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)
static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...) { static void ggml_qnn_log_internal(ggml_log_level level, const char *file, const char *func, int line,
const char *format, ...) {
static std::mutex ggml_qnn_log_internal_mutex; static std::mutex ggml_qnn_log_internal_mutex;
static char s_ggml_qnn_log_internal_buf[GGML_QNN_LOGBUF_LEN]; static char s_ggml_qnn_log_internal_buf[GGML_QNN_LOGBUF_LEN];
@ -78,7 +78,7 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
} }
} }
static const char * get_qnn_backend_name(int n_backend_type) { static const char *get_qnn_backend_name(int n_backend_type) {
switch (n_backend_type) { switch (n_backend_type) {
case QNN_BACKEND_CPU: case QNN_BACKEND_CPU:
return "QNN-CPU"; return "QNN-CPU";
@ -93,13 +93,9 @@ static const char * get_qnn_backend_name(int n_backend_type) {
} }
} }
static bool ggml_graph_compute_helper( static bool ggml_graph_compute_helper(struct ggml_backend *backend, struct ggml_cgraph *graph,
struct ggml_backend * backend, std::vector<uint8_t> &buf, int n_threads, ggml_abort_callback abort_callback,
struct ggml_cgraph * graph, void *abort_callback_data) {
std::vector<uint8_t> & buf,
int n_threads,
ggml_abort_callback abort_callback,
void * abort_callback_data) {
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads); struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
plan.abort_callback = abort_callback; plan.abort_callback = abort_callback;
@ -141,12 +137,11 @@ static inline float ggml_compute_fp16_to_fp32(uint16_t h) {
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
static void tensor_dump(const ggml_tensor * tensor, const char * name) { static void tensor_dump(const ggml_tensor *tensor, const char *name) {
QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
name, tensor->name, ", nb = (%5zi, %5zi, %5zi)\n",
tensor->type, ggml_type_name(tensor->type), name, tensor->name, tensor->type, ggml_type_name(tensor->type), tensor->ne[0], tensor->ne[1],
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]);
tensor->nb[0], tensor->nb[1], tensor->nb[2]);
float value = 0; float value = 0;
std::ostringstream tmposs; std::ostringstream tmposs;
@ -160,10 +155,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
for (int i = 0; i < tensor->ne[2]; i++) { for (int i = 0; i < tensor->ne[2]; i++) {
for (int j = 0; j < tensor->ne[1]; j++) { for (int j = 0; j < tensor->ne[1]; j++) {
for (int k = 0; k < tensor->ne[0]; k++) { for (int k = 0; k < tensor->ne[0]; k++) {
value = ((int8_t *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + value = ((int8_t *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
j * tensor->ne[0] + k]; tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
<< " ";
} }
tmposs << "\n"; tmposs << "\n";
} }
@ -181,10 +174,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
for (int i = 0; i < tensor->ne[2]; i++) { for (int i = 0; i < tensor->ne[2]; i++) {
for (int j = 0; j < tensor->ne[1]; j++) { for (int j = 0; j < tensor->ne[1]; j++) {
for (int k = 0; k < tensor->ne[0]; k++) { for (int k = 0; k < tensor->ne[0]; k++) {
value = ((float *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + value = ((float *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
j * tensor->ne[0] + k]; tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
<< " ";
} }
tmposs << "\n"; tmposs << "\n";
} }
@ -202,11 +193,11 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
for (int i = 0; i < tensor->ne[2]; i++) { for (int i = 0; i < tensor->ne[2]; i++) {
for (int j = 0; j < tensor->ne[1]; j++) { for (int j = 0; j < tensor->ne[1]; j++) {
for (int k = 0; k < tensor->ne[0]; k++) { for (int k = 0; k < tensor->ne[0]; k++) {
unsigned short tmpvalue = ((unsigned short *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + unsigned short tmpvalue =
j * tensor->ne[0] + k]; ((unsigned short *)
tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
value = GGML_FP16_TO_FP32(tmpvalue); value = GGML_FP16_TO_FP32(tmpvalue);
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
<< " ";
} }
tmposs << "\n"; tmposs << "\n";
} }
@ -220,15 +211,14 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
} }
if (tensor->type == GGML_TYPE_Q8_0) { if (tensor->type == GGML_TYPE_Q8_0) {
block_q8_0 * tmp = ((block_q8_0 *)tensor->data); block_q8_0 *tmp = ((block_q8_0 *)tensor->data);
for (int j = 0; j < tensor->ne[1]; j++) { for (int j = 0; j < tensor->ne[1]; j++) {
int n = tensor->ne[0] / QK8_0; //blocks per row int n = tensor->ne[0] / QK8_0; // blocks per row
for (int z = 0; z < n; z++) { for (int z = 0; z < n; z++) {
const float d = GGML_FP16_TO_FP32(tmp[ j * n + z ].d); const float d = GGML_FP16_TO_FP32(tmp[j * n + z].d);
for (int k = 0; k < QK8_0; k++) { for (int k = 0; k < QK8_0; k++) {
value = tmp[j * n + z].qs[k] * d; value = tmp[j * n + z].qs[k] * d;
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
<< " ";
} }
} }
tmposs << "\n"; tmposs << "\n";
@ -241,7 +231,7 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
} }
} }
static uint32_t get_tensor_rank(const ggml_tensor * tensor) { static uint32_t get_tensor_rank(const ggml_tensor *tensor) {
uint32_t rank = 0; uint32_t rank = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) { for (int i = 0; i < GGML_MAX_DIMS; i++) {
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) { if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
@ -251,7 +241,7 @@ static uint32_t get_tensor_rank(const ggml_tensor * tensor) {
return rank; return rank;
} }
static uint32_t get_tensor_data_size(const ggml_tensor * tensor) { static uint32_t get_tensor_data_size(const ggml_tensor *tensor) {
size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]); size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]);
size_t n_dims = get_tensor_rank(tensor); size_t n_dims = get_tensor_rank(tensor);
for (int i = 1; i < n_dims; i++) { for (int i = 1; i < n_dims; i++) {
@ -264,8 +254,8 @@ static uint32_t get_tensor_data_size(const ggml_tensor * tensor) {
return ggml_nbytes(tensor); return ggml_nbytes(tensor);
} }
//ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L20 // ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L20
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { static void init_tensor_uniform(ggml_tensor *tensor, float min = -1.0f, float max = 1.0f) {
size_t size = ggml_nelements(tensor); size_t size = ggml_nelements(tensor);
std::vector<float> data(size); std::vector<float> data(size);
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
@ -274,7 +264,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) { if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
#ifdef GGML_USE_QNN #ifdef GGML_USE_QNN
memcpy((char*)tensor->data, data.data(), size * sizeof(float)); memcpy((char *)tensor->data, data.data(), size * sizeof(float));
#else #else
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
#endif #endif
@ -282,25 +272,25 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0); GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size)); std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
const float * im = imatrix.data(); const float *im = imatrix.data();
if (!ggml_quantize_requires_imatrix(tensor->type)) { if (!ggml_quantize_requires_imatrix(tensor->type)) {
// when the imatrix is optional, we want to test both quantization with and without imatrix // when the imatrix is optional, we want to test both quantization with and without imatrix
// use one of the random numbers to decide // use one of the random numbers to decide
if (data[0] > 0.5f*(min + max)) { if (data[0] > 0.5f * (min + max)) {
im = nullptr; im = nullptr;
} }
} }
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im); ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size / tensor->ne[0], tensor->ne[0], im);
GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size())); GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
#ifdef GGML_USE_QNN #ifdef GGML_USE_QNN
memcpy((char*)tensor->data, dataq.data(), dataq.size()); memcpy((char *)tensor->data, dataq.data(), dataq.size());
#else #else
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
#endif #endif
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) { } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
// This is going to create some weird integers though. // This is going to create some weird integers though.
#ifdef GGML_USE_QNN #ifdef GGML_USE_QNN
memcpy((char*)tensor->data, data.data(), ggml_nbytes(tensor)); memcpy((char *)tensor->data, data.data(), ggml_nbytes(tensor));
#else #else
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor)); ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
#endif #endif
@ -309,32 +299,27 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
} }
} }
//ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L310 // ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L310
static void initialize_tensors(ggml_context * ctx) { static void initialize_tensors(ggml_context *ctx) {
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) { for (ggml_tensor *t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
init_tensor_uniform(t); init_tensor_uniform(t);
} }
} }
static void show_usage() { static void show_usage() {
printf(" " \ printf(
"\nUsage: test_qnn_ops [options]\n" \ " "
"\n" \ "\nUsage: test_qnn_ops [options]\n"
"Options:\n" \ "\n"
" -t GGML_OP_ADD / GGML_OP_MULMAT\n" \ "Options:\n"
" -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n" \ " -t GGML_OP_ADD / GGML_OP_MULMAT\n"
" ?/h print usage infomation\n\n" " -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n"
); " ?/h print usage infomation\n\n");
} }
typedef ggml_tensor * (*ggml_op_unary_t)( typedef ggml_tensor *(*ggml_op_unary_t)(ggml_context *ctx, ggml_tensor *a);
ggml_context * ctx,
ggml_tensor * a);
typedef ggml_tensor * (*ggml_op_binary_t)( typedef ggml_tensor *(*ggml_op_binary_t)(ggml_context *ctx, ggml_tensor *a, ggml_tensor *b);
ggml_context * ctx,
ggml_tensor * a,
ggml_tensor * b);
static constexpr const ggml_op_unary_t kUnaryOps[] = { static constexpr const ggml_op_unary_t kUnaryOps[] = {
nullptr, // GGML_OP_NONE nullptr, // GGML_OP_NONE
@ -400,13 +385,13 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
int sizey = 4; int sizey = 4;
int sizex = 4; int sizex = 4;
struct ggml_context * ctx = nullptr; struct ggml_context *ctx = nullptr;
struct ggml_cgraph * gf = nullptr; struct ggml_cgraph *gf = nullptr;
struct ggml_tensor * src0 = nullptr; struct ggml_tensor *src0 = nullptr;
struct ggml_tensor * src1 = nullptr; struct ggml_tensor *src1 = nullptr;
struct ggml_tensor * dst = nullptr; struct ggml_tensor *dst = nullptr;
ggml_backend_t backend = nullptr; ggml_backend_t backend = nullptr;
ggml_backend_buffer_t buffer= nullptr; ggml_backend_buffer_t buffer = nullptr;
ggml_type qtype = GGML_TYPE_I8; ggml_type qtype = GGML_TYPE_I8;
qtype = GGML_TYPE_F16; qtype = GGML_TYPE_F16;
@ -415,19 +400,16 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
std::vector<uint8_t> work_buffer; std::vector<uint8_t> work_buffer;
QNN_LOG_DEBUG("enter qnn_ggml_op\n"); QNN_LOG_DEBUG("enter qnn_ggml_op\n");
QNN_LOG_DEBUG("ggml op:%d(%s)\n", n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type)); QNN_LOG_DEBUG("ggml op:%d(%s)\n", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type));
n_begin_time = ggml_time_us(); n_begin_time = ggml_time_us();
ctx_size += 1024 * 1024 * 32; ctx_size += 1024 * 1024 * 32;
QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size, QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size, (ctx_size / 1024 / 1024));
(ctx_size / 1024 / 1024));
struct ggml_init_params params = { struct ggml_init_params params = { /*.mem_size =*/ctx_size,
/*.mem_size =*/ ctx_size, /*.mem_buffer =*/NULL,
/*.mem_buffer =*/ NULL, /* no_alloc =*/0 };
/* no_alloc =*/ 0
};
if (n_backend_type != QNN_BACKEND_GGML) { if (n_backend_type != QNN_BACKEND_GGML) {
params.no_alloc = true; params.no_alloc = true;
@ -470,8 +452,7 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
} else if (binary_op) { } else if (binary_op) {
dst = binary_op(ctx, src0, src1); dst = binary_op(ctx, src0, src1);
} else { } else {
QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type, QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type));
ggml_op_name((enum ggml_op) n_ggml_op_type));
ggml_free(ctx); ggml_free(ctx);
ggml_backend_free(backend); ggml_backend_free(backend);
return 3; return 3;
@ -504,17 +485,17 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
TENSOR_DUMP(src1); TENSOR_DUMP(src1);
TENSOR_DUMP(dst); TENSOR_DUMP(dst);
} else { } else {
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
src0->name, ", nb = (%5zi, %5zi, %5zi)\n",
src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2], src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
src0->nb[0], src0->nb[1], src0->nb[2]); src0->nb[0], src0->nb[1], src0->nb[2]);
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
src1->name, ", nb = (%5zi, %5zi, %5zi)\n",
src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2], src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
src1->nb[0], src1->nb[1], src1->nb[2]); src1->nb[0], src1->nb[1], src1->nb[2]);
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n", QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
dst->name, ", nb = (%5zi, %5zi, %5zi)\n",
dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0], dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
dst->nb[1], dst->nb[2]); dst->nb[1], dst->nb[2]);
} }
@ -524,23 +505,19 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
n_end_time = ggml_time_us(); n_end_time = ggml_time_us();
n_duration = (n_end_time - n_begin_time) / 1000; n_duration = (n_end_time - n_begin_time) / 1000;
QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n", ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration); QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n",
ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration);
return 0; return 0;
} }
#define DEFINE_OP(op) { #op, op } #define DEFINE_OP(op) { #op, op }
static const std::unordered_map<std::string, int> kMapStringToGGMLOp = { static const std::unordered_map<std::string, int> kMapStringToGGMLOp = {
DEFINE_OP(GGML_OP_ADD), DEFINE_OP(GGML_OP_ADD), DEFINE_OP(GGML_OP_SUB), DEFINE_OP(GGML_OP_MUL), DEFINE_OP(GGML_OP_DIV),
DEFINE_OP(GGML_OP_SUB), DEFINE_OP(GGML_OP_SQRT), DEFINE_OP(GGML_OP_MUL_MAT), DEFINE_OP(GGML_OP_LOG),
DEFINE_OP(GGML_OP_MUL),
DEFINE_OP(GGML_OP_DIV),
DEFINE_OP(GGML_OP_SQRT),
DEFINE_OP(GGML_OP_MUL_MAT),
DEFINE_OP(GGML_OP_LOG),
}; };
int main(int argc, char * argv[]) { int main(int argc, char *argv[]) {
int num_threads = 4; int num_threads = 4;
int n_backend_type = QNN_BACKEND_CPU; int n_backend_type = QNN_BACKEND_CPU;
int n_ggml_op_type = GGML_OP_ADD; int n_ggml_op_type = GGML_OP_ADD;
@ -575,9 +552,9 @@ int main(int argc, char * argv[]) {
} }
QNN_LOG_DEBUG("enter qnn_ggml_op\n"); QNN_LOG_DEBUG("enter qnn_ggml_op\n");
QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type)); QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type,
ggml_op_name((enum ggml_op)n_ggml_op_type));
qnn_op_ut(num_threads, n_backend_type, n_ggml_op_type); qnn_op_ut(num_threads, n_backend_type, n_ggml_op_type);
return 0; return 0;
} }