diff --git a/tests/ggml-qnn/ggml-qnn-ut.cpp b/tests/ggml-qnn/ggml-qnn-ut.cpp
index f19a6355d..fefb26244 100644
--- a/tests/ggml-qnn/ggml-qnn-ut.cpp
+++ b/tests/ggml-qnn/ggml-qnn-ut.cpp
@@ -1,67 +1,67 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
 #include <dlfcn.h>
 #include <fcntl.h>
-#include <sys/stat.h>
+#include <inttypes.h>
 #include <limits.h>
+#include <math.h>
 #include <signal.h>
-#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
 #include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
 
-#include <string>
-#include <vector>
-#include <thread>
-#include <mutex>
-#include <map>
-#include <set>
-#include <tuple>
-#include <queue>
-#include <fstream>
-#include <iostream>
-#include <iomanip>
-#include <sstream>
-#include <chrono>
-#include <memory>
-#include <regex>
-#include <random>
-#include <functional>
-#include <unordered_map>
-#include <condition_variable>
 #include <cassert>
+#include <chrono>
+#include <condition_variable>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <random>
+#include <regex>
+#include <set>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <tuple>
+#include <unordered_map>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include "ggml.h"
+
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 #include "ggml-qnn.h"
 
-#define GGML_QNN_DEBUG      1
+#define GGML_QNN_DEBUG 1
 #define GGML_QNN_LOGBUF_LEN 4096
 
-#define QNN_LOG_ERROR(...)  ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG,  __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
-#define QNN_LOG_WARN(...)   ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
-#define QNN_LOG_INFO(...)   ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
+#define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
+#define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
+#define QNN_LOG_INFO(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
 
 #if GGML_QNN_DEBUG
-#define QNN_LOG_DEBUG(...)  ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
+#define QNN_LOG_DEBUG(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
 #else
 #define QNN_LOG_DEBUG(...)
 #endif
 
-static void tensor_dump(const ggml_tensor * tensor, const char * name);
+static void tensor_dump(const ggml_tensor *tensor, const char *name);
 
 #define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor)
 
-static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...) {
+static void ggml_qnn_log_internal(ggml_log_level level, const char *file, const char *func, int line,
+                                  const char *format, ...) {
     static std::mutex ggml_qnn_log_internal_mutex;
     static char s_ggml_qnn_log_internal_buf[GGML_QNN_LOGBUF_LEN];
 
@@ -78,7 +78,7 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
     }
 }
 
-static const char * get_qnn_backend_name(int n_backend_type) {
+static const char *get_qnn_backend_name(int n_backend_type) {
     switch (n_backend_type) {
         case QNN_BACKEND_CPU:
             return "QNN-CPU";
@@ -93,13 +93,9 @@ static const char * get_qnn_backend_name(int n_backend_type) {
     }
 }
 
-static bool ggml_graph_compute_helper(
-        struct ggml_backend * backend,
-        struct ggml_cgraph * graph,
-        std::vector<uint8_t> & buf,
-        int n_threads,
-        ggml_abort_callback abort_callback,
-        void * abort_callback_data) {
+static bool ggml_graph_compute_helper(struct ggml_backend *backend, struct ggml_cgraph *graph,
+                                      std::vector<uint8_t> &buf, int n_threads, ggml_abort_callback abort_callback,
+                                      void *abort_callback_data) {
     struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
 
     plan.abort_callback = abort_callback;
@@ -129,8 +125,8 @@ static bool ggml_graph_compute_helper(
 #define QK8_0 32
 
 typedef struct {
-    uint16_t d;        // delta
-    int8_t  qs[QK8_0]; // quants
+    uint16_t d;       // delta
+    int8_t qs[QK8_0]; // quants
 } block_q8_0;
 
 static inline float ggml_compute_fp16_to_fp32(uint16_t h) {
@@ -141,12 +137,11 @@ static inline float ggml_compute_fp16_to_fp32(uint16_t h) {
 
 #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
 
-static void tensor_dump(const ggml_tensor * tensor, const char * name) {
-    QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
-          name, tensor->name,
-          tensor->type, ggml_type_name(tensor->type),
-          tensor->ne[0], tensor->ne[1], tensor->ne[2],
-          tensor->nb[0], tensor->nb[1], tensor->nb[2]);
+static void tensor_dump(const ggml_tensor *tensor, const char *name) {
+    QNN_LOG_DEBUG("dump ggml tensor %s(%s): type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
+                  ", nb = (%5zi, %5zi, %5zi)\n",
+                  name, tensor->name, tensor->type, ggml_type_name(tensor->type), tensor->ne[0], tensor->ne[1],
+                  tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]);
 
     float value = 0;
     std::ostringstream tmposs;
@@ -160,10 +155,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
             for (int i = 0; i < tensor->ne[2]; i++) {
                 for (int j = 0; j < tensor->ne[1]; j++) {
                     for (int k = 0; k < tensor->ne[0]; k++) {
-                        value = ((int8_t *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
-                                                         j * tensor->ne[0] + k];
-                        tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
-                               << " ";
+                        value = ((int8_t *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
+                        tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
                     }
                     tmposs << "\n";
                 }
@@ -181,10 +174,8 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
             for (int i = 0; i < tensor->ne[2]; i++) {
                 for (int j = 0; j < tensor->ne[1]; j++) {
                     for (int k = 0; k < tensor->ne[0]; k++) {
-                        value = ((float *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
-                                                         j * tensor->ne[0] + k];
-                        tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
-                               << " ";
+                        value = ((float *)tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
+                        tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
                     }
                     tmposs << "\n";
                 }
@@ -202,11 +193,11 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
             for (int i = 0; i < tensor->ne[2]; i++) {
                 for (int j = 0; j < tensor->ne[1]; j++) {
                     for (int k = 0; k < tensor->ne[0]; k++) {
-                        unsigned short tmpvalue = ((unsigned short *) tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] +
-                                                         j * tensor->ne[0] + k];
+                        unsigned short tmpvalue =
+                            ((unsigned short *)
+                                 tensor->data)[h * tensor->ne[2] + i * tensor->ne[1] + j * tensor->ne[0] + k];
                         value = GGML_FP16_TO_FP32(tmpvalue);
-                        tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
-                               << " ";
+                        tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
                     }
                     tmposs << "\n";
                 }
@@ -220,15 +211,14 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
     }
 
     if (tensor->type == GGML_TYPE_Q8_0) {
-          block_q8_0 * tmp = ((block_q8_0 *)tensor->data);
-          for (int j = 0; j < tensor->ne[1]; j++) {
-            int n = tensor->ne[0] / QK8_0; //blocks per row
+        block_q8_0 *tmp = ((block_q8_0 *)tensor->data);
+        for (int j = 0; j < tensor->ne[1]; j++) {
+            int n = tensor->ne[0] / QK8_0; // blocks per row
             for (int z = 0; z < n; z++) {
-                const float d = GGML_FP16_TO_FP32(tmp[ j * n + z ].d);
+                const float d = GGML_FP16_TO_FP32(tmp[j * n + z].d);
                 for (int k = 0; k < QK8_0; k++) {
                     value = tmp[j * n + z].qs[k] * d;
-                    tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value
-                               << " ";
+                    tmposs << std::setw(8) << std::fixed << std::setprecision(2) << value << " ";
                 }
             }
             tmposs << "\n";
@@ -241,7 +231,7 @@ static void tensor_dump(const ggml_tensor * tensor, const char * name) {
     }
 }
 
-static uint32_t get_tensor_rank(const ggml_tensor * tensor) {
+static uint32_t get_tensor_rank(const ggml_tensor *tensor) {
     uint32_t rank = 0;
     for (int i = 0; i < GGML_MAX_DIMS; i++) {
         if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
@@ -251,7 +241,7 @@ static uint32_t get_tensor_rank(const ggml_tensor * tensor) {
     return rank;
 }
 
-static uint32_t get_tensor_data_size(const ggml_tensor * tensor) {
+static uint32_t get_tensor_data_size(const ggml_tensor *tensor) {
     size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]);
     size_t n_dims = get_tensor_rank(tensor);
     for (int i = 1; i < n_dims; i++) {
@@ -264,8 +254,8 @@ static uint32_t get_tensor_data_size(const ggml_tensor * tensor) {
     return ggml_nbytes(tensor);
 }
 
-//ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L20
-static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
+// ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L20
+static void init_tensor_uniform(ggml_tensor *tensor, float min = -1.0f, float max = 1.0f) {
     size_t size = ggml_nelements(tensor);
     std::vector<float> data(size);
     for (size_t i = 0; i < size; i++) {
@@ -274,7 +264,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
 
     if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
 #ifdef GGML_USE_QNN
-        memcpy((char*)tensor->data, data.data(), size * sizeof(float));
+        memcpy((char *)tensor->data, data.data(), size * sizeof(float));
 #else
         ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
 #endif
@@ -282,25 +272,25 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
         GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
         std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
         std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
-        const float * im = imatrix.data();
+        const float *im = imatrix.data();
         if (!ggml_quantize_requires_imatrix(tensor->type)) {
             // when the imatrix is optional, we want to test both quantization with and without imatrix
             // use one of the random numbers to decide
-            if (data[0] > 0.5f*(min + max)) {
+            if (data[0] > 0.5f * (min + max)) {
                 im = nullptr;
             }
         }
-        ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
+        ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size / tensor->ne[0], tensor->ne[0], im);
         GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
 #ifdef GGML_USE_QNN
-        memcpy((char*)tensor->data, dataq.data(), dataq.size());
+        memcpy((char *)tensor->data, dataq.data(), dataq.size());
 #else
         ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
 #endif
     } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
         // This is going to create some weird integers though.
 #ifdef GGML_USE_QNN
-        memcpy((char*)tensor->data, data.data(), ggml_nbytes(tensor));
+        memcpy((char *)tensor->data, data.data(), ggml_nbytes(tensor));
 #else
         ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
 #endif
@@ -309,125 +299,117 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
     }
 }
 
-//ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L310
-static void initialize_tensors(ggml_context * ctx) {
-    for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
+// ref: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-backend-ops.cpp#L310
+static void initialize_tensors(ggml_context *ctx) {
+    for (ggml_tensor *t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
         init_tensor_uniform(t);
     }
 }
 
 static void show_usage() {
-    printf(" " \
-        "\nUsage: test_qnn_ops [options]\n" \
-        "\n" \
-        "Options:\n" \
-        " -t GGML_OP_ADD / GGML_OP_MULMAT\n" \
-        " -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n" \
-        " ?/h print usage infomation\n\n"
-    );
+    printf(
+        " "
+        "\nUsage: test_qnn_ops [options]\n"
+        "\n"
+        "Options:\n"
+        " -t GGML_OP_ADD / GGML_OP_MULMAT\n"
+        " -b 0(QNN_CPU) 1(QNN_GPU) 2(QNN_NPU) 3(ggml)\n"
+        " ?/h print usage infomation\n\n");
 }
 
-typedef ggml_tensor * (*ggml_op_unary_t)(
-        ggml_context * ctx,        
-        ggml_tensor * a);
+typedef ggml_tensor *(*ggml_op_unary_t)(ggml_context *ctx, ggml_tensor *a);
 
-typedef ggml_tensor * (*ggml_op_binary_t)(
-        ggml_context * ctx,        
-        ggml_tensor * a,
-        ggml_tensor * b);
+typedef ggml_tensor *(*ggml_op_binary_t)(ggml_context *ctx, ggml_tensor *a, ggml_tensor *b);
 
 static constexpr const ggml_op_unary_t kUnaryOps[] = {
-    nullptr,                      // GGML_OP_NONE
-    nullptr,                      // GGML_OP_DUP
-    nullptr,                      // GGML_OP_ADD
-    nullptr,                      // GGML_OP_ADD1
-    nullptr,                      // GGML_OP_ACC
-    nullptr,                      // GGML_OP_SUB
-    nullptr,                      // GGML_OP_MUL
-    nullptr,                      // GGML_OP_DIV
-    nullptr,                      // GGML_OP_SQR
-    ggml_sqrt,                    // GGML_OP_SQRT
-    ggml_log,                     // GGML_OP_LOG
-    nullptr,                      // GGML_OP_SUM
-    nullptr,                      // GGML_OP_SUM_ROWS
-    nullptr,                      // GGML_OP_MEAN
-    nullptr,                      // GGML_OP_ARGMAX
-    nullptr,                      // GGML_OP_REPEAT
-    nullptr,                      // GGML_OP_REPEAT_BACK
-    nullptr,                      // GGML_OP_CONCAT
-    nullptr,                      // GGML_OP_SILU_BACK
-    nullptr,                      // GGML_OP_NORM
-    nullptr,                      // GGML_OP_RMS_NORM
-    nullptr,                      // GGML_OP_RMS_NORM_BACK
-    nullptr,                      // GGML_OP_GROUP_NORM
-    nullptr,                      // GGML_OP_MUL_MAT
+    nullptr,   // GGML_OP_NONE
+    nullptr,   // GGML_OP_DUP
+    nullptr,   // GGML_OP_ADD
+    nullptr,   // GGML_OP_ADD1
+    nullptr,   // GGML_OP_ACC
+    nullptr,   // GGML_OP_SUB
+    nullptr,   // GGML_OP_MUL
+    nullptr,   // GGML_OP_DIV
+    nullptr,   // GGML_OP_SQR
+    ggml_sqrt, // GGML_OP_SQRT
+    ggml_log,  // GGML_OP_LOG
+    nullptr,   // GGML_OP_SUM
+    nullptr,   // GGML_OP_SUM_ROWS
+    nullptr,   // GGML_OP_MEAN
+    nullptr,   // GGML_OP_ARGMAX
+    nullptr,   // GGML_OP_REPEAT
+    nullptr,   // GGML_OP_REPEAT_BACK
+    nullptr,   // GGML_OP_CONCAT
+    nullptr,   // GGML_OP_SILU_BACK
+    nullptr,   // GGML_OP_NORM
+    nullptr,   // GGML_OP_RMS_NORM
+    nullptr,   // GGML_OP_RMS_NORM_BACK
+    nullptr,   // GGML_OP_GROUP_NORM
+    nullptr,   // GGML_OP_MUL_MAT
 };
 
 static constexpr const ggml_op_binary_t kBinaryOps[] = {
-    nullptr,                      // GGML_OP_NONE
-    nullptr,                      // GGML_OP_DUP
-    ggml_add,                     // GGML_OP_ADD
-    nullptr,                      // GGML_OP_ADD1
-    nullptr,                      // GGML_OP_ACC
-    ggml_sub,                     // GGML_OP_SUB
-    ggml_mul,                     // GGML_OP_MUL
-    ggml_div,                     // GGML_OP_DIV
-    nullptr,                      // GGML_OP_SQR
-    nullptr,                      // GGML_OP_SQRT
-    nullptr,                      // GGML_OP_LOG
-    nullptr,                      // GGML_OP_SUM
-    nullptr,                      // GGML_OP_SUM_ROWS
-    nullptr,                      // GGML_OP_MEAN
-    nullptr,                      // GGML_OP_ARGMAX
-    nullptr,                      // GGML_OP_REPEAT
-    nullptr,                      // GGML_OP_REPEAT_BACK
-    nullptr,                      // GGML_OP_CONCAT
-    nullptr,                      // GGML_OP_SILU_BACK
-    nullptr,                      // GGML_OP_NORM
-    nullptr,                      // GGML_OP_RMS_NORM
-    nullptr,                      // GGML_OP_RMS_NORM_BACK
-    nullptr,                      // GGML_OP_GROUP_NORM
-    ggml_mul_mat,                 // GGML_OP_MUL_MAT
+    nullptr,      // GGML_OP_NONE
+    nullptr,      // GGML_OP_DUP
+    ggml_add,     // GGML_OP_ADD
+    nullptr,      // GGML_OP_ADD1
+    nullptr,      // GGML_OP_ACC
+    ggml_sub,     // GGML_OP_SUB
+    ggml_mul,     // GGML_OP_MUL
+    ggml_div,     // GGML_OP_DIV
+    nullptr,      // GGML_OP_SQR
+    nullptr,      // GGML_OP_SQRT
+    nullptr,      // GGML_OP_LOG
+    nullptr,      // GGML_OP_SUM
+    nullptr,      // GGML_OP_SUM_ROWS
+    nullptr,      // GGML_OP_MEAN
+    nullptr,      // GGML_OP_ARGMAX
+    nullptr,      // GGML_OP_REPEAT
+    nullptr,      // GGML_OP_REPEAT_BACK
+    nullptr,      // GGML_OP_CONCAT
+    nullptr,      // GGML_OP_SILU_BACK
+    nullptr,      // GGML_OP_NORM
+    nullptr,      // GGML_OP_RMS_NORM
+    nullptr,      // GGML_OP_RMS_NORM_BACK
+    nullptr,      // GGML_OP_GROUP_NORM
+    ggml_mul_mat, // GGML_OP_MUL_MAT
 };
 
 static_assert(kBinaryOps[GGML_OP_MUL_MAT] == ggml_mul_mat, "ggml_mul_mat at wrong index, check kBinaryOps");
 
 static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
-    int64_t n_begin_time        = 0LL;
-    int64_t n_end_time          = 0LL;
-    int64_t n_duration          = 0LL;
-    size_t  ctx_size            = 0;
-    int     sizey               = 4;
-    int     sizex               = 4;
+    int64_t n_begin_time = 0LL;
+    int64_t n_end_time = 0LL;
+    int64_t n_duration = 0LL;
+    size_t ctx_size = 0;
+    int sizey = 4;
+    int sizex = 4;
 
-    struct ggml_context * ctx   = nullptr;
-    struct ggml_cgraph  * gf    = nullptr;
-    struct ggml_tensor  * src0  = nullptr;
-    struct ggml_tensor  * src1  = nullptr;
-    struct ggml_tensor  * dst   = nullptr;
-    ggml_backend_t backend      = nullptr;
-    ggml_backend_buffer_t buffer= nullptr;
+    struct ggml_context *ctx = nullptr;
+    struct ggml_cgraph *gf = nullptr;
+    struct ggml_tensor *src0 = nullptr;
+    struct ggml_tensor *src1 = nullptr;
+    struct ggml_tensor *dst = nullptr;
+    ggml_backend_t backend = nullptr;
+    ggml_backend_buffer_t buffer = nullptr;
 
-    ggml_type qtype   = GGML_TYPE_I8;
-    qtype             = GGML_TYPE_F16;
-    qtype             = GGML_TYPE_Q8_0;
-    qtype             = GGML_TYPE_F32;
+    ggml_type qtype = GGML_TYPE_I8;
+    qtype = GGML_TYPE_F16;
+    qtype = GGML_TYPE_Q8_0;
+    qtype = GGML_TYPE_F32;
 
     std::vector<uint8_t> work_buffer;
     QNN_LOG_DEBUG("enter qnn_ggml_op\n");
-    QNN_LOG_DEBUG("ggml op:%d(%s)\n", n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type));
+    QNN_LOG_DEBUG("ggml op:%d(%s)\n", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type));
 
     n_begin_time = ggml_time_us();
 
     ctx_size += 1024 * 1024 * 32;
-    QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size,
-                  (ctx_size / 1024 / 1024));
+    QNN_LOG_DEBUG("Allocating Memory of size %zi bytes, %zi MB\n", ctx_size, (ctx_size / 1024 / 1024));
 
-    struct ggml_init_params params = {
-            /*.mem_size   =*/ ctx_size,
-            /*.mem_buffer =*/ NULL,
-            /* no_alloc   =*/ 0
-    };
+    struct ggml_init_params params = { /*.mem_size   =*/ctx_size,
+                                       /*.mem_buffer =*/NULL,
+                                       /* no_alloc   =*/0 };
 
     if (n_backend_type != QNN_BACKEND_GGML) {
         params.no_alloc = true;
@@ -470,8 +452,7 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
     } else if (binary_op) {
         dst = binary_op(ctx, src0, src1);
     } else {
-        QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type,
-                        ggml_op_name((enum ggml_op) n_ggml_op_type));
+        QNN_LOG_WARN("ggml op %d(%s) not supported", n_ggml_op_type, ggml_op_name((enum ggml_op)n_ggml_op_type));
         ggml_free(ctx);
         ggml_backend_free(backend);
         return 3;
@@ -504,17 +485,17 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
         TENSOR_DUMP(src1);
         TENSOR_DUMP(dst);
     } else {
-        QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
-                      src0->name,
-                      src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
+        QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
+                      ", nb = (%5zi, %5zi, %5zi)\n",
+                      src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
                       src0->nb[0], src0->nb[1], src0->nb[2]);
-        QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
-                      src1->name,
-                      src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
+        QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
+                      ", nb = (%5zi, %5zi, %5zi)\n",
+                      src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
                       src1->nb[0], src1->nb[1], src1->nb[2]);
-        QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
-                      dst->name,
-                      dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
+        QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
+                      ", nb = (%5zi, %5zi, %5zi)\n",
+                      dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
                       dst->nb[1], dst->nb[2]);
     }
 
@@ -524,26 +505,22 @@ static int qnn_op_ut(int num_threads, int n_backend_type, int n_ggml_op_type) {
 
     n_end_time = ggml_time_us();
     n_duration = (n_end_time - n_begin_time) / 1000;
-    QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n", ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration);
+    QNN_LOG_DEBUG("duration of ut GGML_OP_%s using QNN backend %s: %lld milliseconds\n",
+                  ggml_op_name((enum ggml_op)n_ggml_op_type), get_qnn_backend_name(n_backend_type), n_duration);
     return 0;
 }
 
 #define DEFINE_OP(op) { #op, op }
 
 static const std::unordered_map<std::string, int> kMapStringToGGMLOp = {
-    DEFINE_OP(GGML_OP_ADD),
-    DEFINE_OP(GGML_OP_SUB),
-    DEFINE_OP(GGML_OP_MUL),
-    DEFINE_OP(GGML_OP_DIV),
-    DEFINE_OP(GGML_OP_SQRT),
-    DEFINE_OP(GGML_OP_MUL_MAT),
-    DEFINE_OP(GGML_OP_LOG),
+    DEFINE_OP(GGML_OP_ADD),  DEFINE_OP(GGML_OP_SUB),     DEFINE_OP(GGML_OP_MUL), DEFINE_OP(GGML_OP_DIV),
+    DEFINE_OP(GGML_OP_SQRT), DEFINE_OP(GGML_OP_MUL_MAT), DEFINE_OP(GGML_OP_LOG),
 };
 
-int main(int argc, char * argv[]) {
-    int num_threads             = 4;
-    int n_backend_type          = QNN_BACKEND_CPU;
-    int n_ggml_op_type          = GGML_OP_ADD;
+int main(int argc, char *argv[]) {
+    int num_threads = 4;
+    int n_backend_type = QNN_BACKEND_CPU;
+    int n_ggml_op_type = GGML_OP_ADD;
 
     for (int i = 1; i < argc; i++) {
         if (0 == strcmp(argv[i], "-t")) {
@@ -561,7 +538,7 @@ int main(int argc, char * argv[]) {
             if (i + 1 < argc) {
                 int backend = atoi(argv[i + 1]);
                 if (backend <= QNN_BACKEND_GGML)
-                    n_backend_type     = backend;
+                    n_backend_type = backend;
                 else {
                     show_usage();
                     return 1;
@@ -575,9 +552,9 @@ int main(int argc, char * argv[]) {
     }
 
     QNN_LOG_DEBUG("enter qnn_ggml_op\n");
-    QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type, ggml_op_name((enum ggml_op) n_ggml_op_type));
+    QNN_LOG_DEBUG("backend %d, ggml op:%d(%s)", n_backend_type, n_ggml_op_type,
+                  ggml_op_name((enum ggml_op)n_ggml_op_type));
     qnn_op_ut(num_threads, n_backend_type, n_ggml_op_type);
 
-
     return 0;
 }