format with clang-format
This commit is contained in:
parent
c46b4deea9
commit
4410fd6563
1 changed files with 176 additions and 199 deletions
|
@ -1,45 +1,44 @@
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/stat.h>
|
#include <inttypes.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <fcntl.h>
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <thread>
|
|
||||||
#include <mutex>
|
|
||||||
#include <map>
|
|
||||||
#include <set>
|
|
||||||
#include <tuple>
|
|
||||||
#include <queue>
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <sstream>
|
|
||||||
#include <chrono>
|
|
||||||
#include <memory>
|
|
||||||
#include <regex>
|
|
||||||
#include <random>
|
|
||||||
#include <functional>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <condition_variable>
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <fstream>
|
||||||
|
#include <functional>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <queue>
|
||||||
|
#include <random>
|
||||||
|
#include <regex>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <thread>
|
||||||
|
#include <tuple>
|
||||||
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
#include "ggml-qnn.h"
|
#include "ggml-qnn.h"
|
||||||
|
@ -61,7 +60,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name);
|
||||||
|
|
||||||
#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)
|
#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)
|
||||||
|
|
||||||
static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...) {
|
static void ggml_qnn_log_internal(ggml_log_level level, const char *file, const char *func, int line,
|
||||||
|
const char *format, ...) {
|
||||||
static std::mutex ggml_qnn_log_internal_mutex;
|
static std::mutex ggml_qnn_log_internal_mutex;
|
||||||
static char s_ggml_qnn_log_internal_buf[GGML_QNN_LOGBUF_LEN];
|
static char s_ggml_qnn_log_internal_buf[GGML_QNN_LOGBUF_LEN];
|
||||||
|
|
||||||
|
@ -93,12 +93,8 @@ static const char * get_qnn_backend_name(int n_backend_type) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_graph_compute_helper(
|
static bool ggml_graph_compute_helper(struct ggml_backend *backend, struct ggml_cgraph *graph,
|
||||||
struct ggml_backend * backend,
|
std::vector<uint8_t> &buf, int n_threads, ggml_abort_callback abort_callback,
|
||||||
struct ggml_cgraph * graph,
|
|
||||||
std::vector<uint8_t> & buf,
|
|
||||||
int n_threads,
|
|
||||||
ggml_abort_callback abort_callback,
|
|
||||||
void *abort_callback_data) {
|
void *abort_callback_data) {
|
||||||
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
|
||||||
|
|
||||||
|
@ -142,11 +138,10 @@ static inline float ggml_compute_fp16_to_fp32(uint16_t h) {
|
||||||
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
|
||||||
static void tensor_dump(const ggml_tensor *tensor, const char *name) {
|
static void tensor_dump(const ggml_tensor *tensor, const char *name) {
|
||||||
QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||||
name, tensor->name,
|
", nb = (%5zi, %5zi, %5zi)\n",
|
||||||
tensor->type, ggml_type_name(tensor->type),
|
name, tensor->name, tensor->type, ggml_type_name(tensor->type), tensor->ne[0], tensor->ne[1],
|
||||||
tensor->ne[0], tensor->ne[1], tensor->ne[2],
|
tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]);
|
||||||
tensor->nb[0], tensor->nb[1], tensor->nb[2]);
|
|
||||||
|
|
||||||
float value = 0;
|
float value = 0;
|
||||||
std::ostringstream tmposs;
|
std::ostringstream tmposs;
|
||||||
|
@ -160,10 +155,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
|
||||||
for (int i = 0; i < tensor->ne[2]; i++) {
|
for (int i = 0; i < tensor->ne[2]; i++) {
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
for (int j = 0; j < tensor->ne[1]; j++) {
|
||||||
for (int k = 0; k < tensor->ne[0]; k++) {
|
for (int k = 0; k < tensor->ne[0]; k++) {
|
||||||
value = ((int8_t *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
|
value = ((int8_t *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
|
||||||
j * tensor->ne[0] + k];
|
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
|
||||||
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
|
|
||||||
<< " ";
|
|
||||||
}
|
}
|
||||||
tmposs << "\n";
|
tmposs << "\n";
|
||||||
}
|
}
|
||||||
|
@ -181,10 +174,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
|
||||||
for (int i = 0; i < tensor->ne[2]; i++) {
|
for (int i = 0; i < tensor->ne[2]; i++) {
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
for (int j = 0; j < tensor->ne[1]; j++) {
|
||||||
for (int k = 0; k < tensor->ne[0]; k++) {
|
for (int k = 0; k < tensor->ne[0]; k++) {
|
||||||
value = ((float *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
|
value = ((float *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
|
||||||
j * tensor->ne[0] + k];
|
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
|
||||||
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
|
|
||||||
<< " ";
|
|
||||||
}
|
}
|
||||||
tmposs << "\n";
|
tmposs << "\n";
|
||||||
}
|
}
|
||||||
|
@ -202,11 +193,11 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
|
||||||
for (int i = 0; i < tensor->ne[2]; i++) {
|
for (int i = 0; i < tensor->ne[2]; i++) {
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
for (int j = 0; j < tensor->ne[1]; j++) {
|
||||||
for (int k = 0; k < tensor->ne[0]; k++) {
|
for (int k = 0; k < tensor->ne[0]; k++) {
|
||||||
unsigned short tmpvalue = ((unsigned short *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
|
unsigned short tmpvalue =
|
||||||
j * tensor->ne[0] + k];
|
((unsigned short *)
|
||||||
|
tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
|
||||||
value = GGML_FP16_TO_FP32(tmpvalue);
|
value = GGML_FP16_TO_FP32(tmpvalue);
|
||||||
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
|
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
|
||||||
<< " ";
|
|
||||||
}
|
}
|
||||||
tmposs << "\n";
|
tmposs << "\n";
|
||||||
}
|
}
|
||||||
|
@ -227,8 +218,7 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
|
||||||
const float d = GGML_FP16_TO_FP32(tmp[j * n + z].d);
|
const float d = GGML_FP16_TO_FP32(tmp[j * n + z].d);
|
||||||
for (int k = 0; k < QK8_0; k++) {
|
for (int k = 0; k < QK8_0; k++) {
|
||||||
value = tmp[j * n + z].qs[k] * d;
|
value = tmp[j * n + z].qs[k] * d;
|
||||||
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
|
tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
|
||||||
<< " ";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tmposs << "\n";
|
tmposs << "\n";
|
||||||
|
@ -317,24 +307,19 @@ static void initialize_tensors(ggml_context * ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
printf(" " \
|
printf(
|
||||||
"\nUsage: test_qnn_ops [options]\n" \
|
" "
|
||||||
"\n" \
|
"\nUsage: test_qnn_ops [options]\n"
|
||||||
"Options:\n" \
|
"\n"
|
||||||
" -t GGML_OP_ADD / GGML_OP_MULMAT\n" \
|
"Options:\n"
|
||||||
" -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n" \
|
" -t GGML_OP_ADD / GGML_OP_MULMAT\n"
|
||||||
" ?/h print usage infomation\n\n"
|
" -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n"
|
||||||
);
|
" ?/h print usage infomation\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef ggml_tensor * (*ggml_op_unary_t)(
|
typedef ggml_tensor *(*ggml_op_unary_t)(ggml_context *ctx, ggml_tensor *a);
|
||||||
ggml_context * ctx,
|
|
||||||
ggml_tensor * a);
|
|
||||||
|
|
||||||
typedef ggml_tensor * (*ggml_op_binary_t)(
|
typedef ggml_tensor *(*ggml_op_binary_t)(ggml_context *ctx, ggml_tensor *a, ggml_tensor *b);
|
||||||
ggml_context * ctx,
|
|
||||||
ggml_tensor * a,
|
|
||||||
ggml_tensor * b);
|
|
||||||
|
|
||||||
static constexpr const ggml_op_unary_t kUnaryOps[] = {
|
static constexpr const ggml_op_unary_t kUnaryOps[] = {
|
||||||
nullptr, // GGML_OP_NONE
|
nullptr, // GGML_OP_NONE
|
||||||
|
@ -420,14 +405,11 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
|
||||||
n_begin_time = ggml_time_us();
|
n_begin_time = ggml_time_us();
|
||||||
|
|
||||||
ctx_size += 1024 * 1024 * 32;
|
ctx_size += 1024 * 1024 * 32;
|
||||||
QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size,
|
QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size, (ctx_size / 1024 / 1024));
|
||||||
(ctx_size / 1024 / 1024));
|
|
||||||
|
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = { /*.mem_size =*/ctx_size,
|
||||||
/*.mem_size =*/ ctx_size,
|
|
||||||
/*.mem_buffer =*/NULL,
|
/*.mem_buffer =*/NULL,
|
||||||
/* no_alloc =*/ 0
|
/* no_alloc =*/0 };
|
||||||
};
|
|
||||||
|
|
||||||
if (n_backend_type != QNN_BACKEND_GGML) {
|
if (n_backend_type != QNN_BACKEND_GGML) {
|
||||||
params.no_alloc = true;
|
params.no_alloc = true;
|
||||||
|
@ -470,8 +452,7 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
|
||||||
} else if (binary_op) {
|
} else if (binary_op) {
|
||||||
dst = binary_op(ctx, src0, src1);
|
dst = binary_op(ctx, src0, src1);
|
||||||
} else {
|
} else {
|
||||||
QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type,
|
QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type));
|
||||||
ggml_op_name((enum ggml_op) n_ggml_op_type));
|
|
||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
ggml_backend_free(backend);
|
ggml_backend_free(backend);
|
||||||
return 3;
|
return 3;
|
||||||
|
@ -504,17 +485,17 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
|
||||||
TENSOR_DUMP(src1);
|
TENSOR_DUMP(src1);
|
||||||
TENSOR_DUMP(dst);
|
TENSOR_DUMP(dst);
|
||||||
} else {
|
} else {
|
||||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||||
src0->name,
|
", nb = (%5zi, %5zi, %5zi)\n",
|
||||||
src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
|
src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
|
||||||
src0->nb[0], src0->nb[1], src0->nb[2]);
|
src0->nb[0], src0->nb[1], src0->nb[2]);
|
||||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||||
src1->name,
|
", nb = (%5zi, %5zi, %5zi)\n",
|
||||||
src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
|
src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
|
||||||
src1->nb[0], src1->nb[1], src1->nb[2]);
|
src1->nb[0], src1->nb[1], src1->nb[2]);
|
||||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||||
dst->name,
|
", nb = (%5zi, %5zi, %5zi)\n",
|
||||||
dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
|
dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
|
||||||
dst->nb[1], dst->nb[2]);
|
dst->nb[1], dst->nb[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -524,20 +505,16 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
|
||||||
|
|
||||||
n_end_time = ggml_time_us();
|
n_end_time = ggml_time_us();
|
||||||
n_duration = (n_end_time - n_begin_time) / 1000;
|
n_duration = (n_end_time - n_begin_time) / 1000;
|
||||||
QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n", ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration);
|
QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n",
|
||||||
|
ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DEFINE_OP(op) { #op, op }
|
#define DEFINE_OP(op) { #op, op }
|
||||||
|
|
||||||
static const std::unordered_map<std::string, int> kMapStringToGGMLOp = {
|
static const std::unordered_map<std::string, int> kMapStringToGGMLOp = {
|
||||||
DEFINE_OP(GGML_OP_ADD),
|
DEFINE_OP(GGML_OP_ADD), DEFINE_OP(GGML_OP_SUB), DEFINE_OP(GGML_OP_MUL), DEFINE_OP(GGML_OP_DIV),
|
||||||
DEFINE_OP(GGML_OP_SUB),
|
DEFINE_OP(GGML_OP_SQRT), DEFINE_OP(GGML_OP_MUL_MAT), DEFINE_OP(GGML_OP_LOG),
|
||||||
DEFINE_OP(GGML_OP_MUL),
|
|
||||||
DEFINE_OP(GGML_OP_DIV),
|
|
||||||
DEFINE_OP(GGML_OP_SQRT),
|
|
||||||
DEFINE_OP(GGML_OP_MUL_MAT),
|
|
||||||
DEFINE_OP(GGML_OP_LOG),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
@ -575,9 +552,9 @@ int main(int argc, char * argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
QNN_LOG_DEBUG("enter qnn_ggml_op\n");
|
QNN_LOG_DEBUG("enter qnn_ggml_op\n");
|
||||||
QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type));
|
QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type,
|
||||||
|
ggml_op_name((enum ggml_op)n_ggml_op_type));
|
||||||
qnn_op_ut(num_threads, n_backend_type, n_ggml_op_type);
|
qnn_op_ut(num_threads, n_backend_type, n_ggml_op_type);
|
||||||
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue