add clang format file and reformating
This commit is contained in:
parent
38f88d5fb1
commit
000240cf62
12 changed files with 1514 additions and 1809 deletions
|
@ -1,41 +1,48 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#define GGML_QNN_MAX_DEVICES 3
|
||||
#define GGML_QNN_MAX_DEVICES 3
|
||||
|
||||
enum QNNBackend {
|
||||
QNN_BACKEND_CPU,
|
||||
QNN_BACKEND_GPU,
|
||||
QNN_BACKEND_NPU,
|
||||
QNN_BACKEND_GGML, //"fake" QNN backend, used for compare performance between QNN and original GGML
|
||||
QNN_BACKEND_CPU,
|
||||
QNN_BACKEND_GPU,
|
||||
QNN_BACKEND_NPU,
|
||||
QNN_BACKEND_GGML, //"fake" QNN backend, used for compare performance between
|
||||
// QNN and original GGML
|
||||
};
|
||||
|
||||
GGML_API int ggml_backend_qnn_reg_devices(void);
|
||||
GGML_API int ggml_backend_qnn_reg_devices(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param device 0: QNN_BACKEND_CPU 1: QNN_BACKEND_GPU 2: QNN_BACKEND_NPU
|
||||
* @param qnn_lib_path qnn library path, such as "/data/local/tmp/" on Android or specified in JNI layer
|
||||
* @param device 0: QNN_BACKEND_CPU 1: QNN_BACKEND_GPU 2:
|
||||
* QNN_BACKEND_NPU
|
||||
* @param qnn_lib_path qnn library path, such as "/data/local/tmp/" on
|
||||
* Android or specified in JNI layer
|
||||
* @return
|
||||
*/
|
||||
GGML_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path);
|
||||
GGML_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num,
|
||||
const char* qnn_lib_path);
|
||||
|
||||
GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend);
|
||||
GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);
|
||||
GGML_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend,
|
||||
int thread_counts);
|
||||
|
||||
GGML_API int ggml_backend_qnn_get_device_count(void);
|
||||
GGML_API int ggml_backend_qnn_get_device_count(void);
|
||||
|
||||
GGML_API void ggml_backend_qnn_get_device_description(size_t dev_num, char * description, size_t description_size);
|
||||
GGML_API void ggml_backend_qnn_get_device_description(size_t dev_num,
|
||||
char* description,
|
||||
size_t description_size);
|
||||
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t dev_num);
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t
|
||||
ggml_backend_qnn_buffer_type(size_t dev_num);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
31
ggml/src/ggml-qnn/.clang-format
Normal file
31
ggml/src/ggml-qnn/.clang-format
Normal file
|
@ -0,0 +1,31 @@
|
|||
---
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: true
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BreakBeforeBraces: Custom
|
||||
BreakConstructorInitializers: AfterColon
|
||||
ColumnLimit: 120
|
||||
Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: false
|
||||
IncludeCategories:
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
- Regex: '^<.*'
|
||||
Priority: 2
|
||||
- Regex: '^"ggml\.h"'
|
||||
Priority: 3
|
||||
- Regex: '^"ggml-.+\.h"'
|
||||
Priority: 4
|
||||
- Regex: '.*'
|
||||
Priority: 5
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
MaxEmptyLinesToKeep: 1
|
||||
PointerAlignment: Right
|
||||
SortIncludes: true
|
||||
SpacesBeforeTrailingComments: 1
|
||||
UseTab: Never
|
|
@ -1,22 +1,21 @@
|
|||
|
||||
#include "backend-ops.hpp"
|
||||
|
||||
#include "utils.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
|
||||
static bool qnn_is_valid_params(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
static bool qnn_is_valid_params(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
if (!ctx || !src0 || !src1 || !dst) {
|
||||
QNN_LOG_WARN("invalid params\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto* instance = ctx->instance;
|
||||
auto* tensor0 = src0->extra;
|
||||
auto* tensor1 = src1->extra;
|
||||
auto* tensor2 = dst->extra;
|
||||
auto *instance = ctx->instance;
|
||||
auto *tensor0 = src0->extra;
|
||||
auto *tensor1 = src1->extra;
|
||||
auto *tensor2 = dst->extra;
|
||||
if (!instance || !tensor0 || !tensor1 || !tensor2) {
|
||||
QNN_LOG_WARN("invalid tensors\n");
|
||||
return false;
|
||||
|
@ -26,28 +25,28 @@ static bool qnn_is_valid_params(ggml_backend_qnn_context* ctx, const ggml_tensor
|
|||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define CHECK_PARAMS(ctx, src0, src1, dst) \
|
||||
do { \
|
||||
if (!qnn_is_valid_params((ctx), (src0), (src1), (dst))) { \
|
||||
return; \
|
||||
} \
|
||||
#define CHECK_PARAMS(ctx, src0, src1, dst) \
|
||||
do { \
|
||||
if (!qnn_is_valid_params((ctx), (src0), (src1), (dst))) { \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#define CHECK_PARAMS(ctx, src0, src1, dst)
|
||||
#endif
|
||||
|
||||
//TODO: this function can be removed later because there are duplicated codes with ggml_qnn_mul_mat
|
||||
// keep it for illustrate how to implement a specified GGMPL OP using QNN API + QNN RPC
|
||||
static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
bool graph_initialized = false;
|
||||
qnn::qnn_instance* instance = nullptr;
|
||||
std::string graph_name = "ggml_op_qnn_add";
|
||||
Qnn_GraphHandle_t graph_handle = nullptr;
|
||||
Qnn_Param_t qnn_params[] = {};
|
||||
enum ggml_op ggmlop = GGML_OP_ADD;
|
||||
// TODO: this function can be removed later because there are duplicated codes with ggml_qnn_mul_mat
|
||||
// keep it for illustrate how to implement a specified GGMPL OP using QNN API + QNN RPC
|
||||
static void ggml_qnn_add(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
bool graph_initialized = false;
|
||||
qnn::qnn_instance *instance = nullptr;
|
||||
std::string graph_name = "ggml_op_qnn_add";
|
||||
Qnn_GraphHandle_t graph_handle = nullptr;
|
||||
Qnn_Param_t qnn_params[] = {};
|
||||
enum ggml_op ggmlop = GGML_OP_ADD;
|
||||
|
||||
CHECK_PARAMS(ctx, src0, src1, dst);
|
||||
instance = ctx->instance;
|
||||
|
@ -57,16 +56,14 @@ static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
perf.start();
|
||||
|
||||
std::string map_entry(ggml_op_name(ggmlop));
|
||||
if (instance->_qnn_graph_map.find(map_entry) !=
|
||||
instance->_qnn_graph_map.end()) {
|
||||
if (instance->_qnn_graph_map.find(map_entry) != instance->_qnn_graph_map.end()) {
|
||||
graph_initialized = true;
|
||||
auto& graph_item = instance->_qnn_graph_map[map_entry];
|
||||
auto &graph_item = instance->_qnn_graph_map[map_entry];
|
||||
graph_handle = std::get<0>(graph_item);
|
||||
}
|
||||
|
||||
if (!graph_initialized) {
|
||||
graph_name = graph_name + "_" + std::to_string(ctx->threads) +
|
||||
"_" + src0->name + "_" + src1->name;
|
||||
graph_name = graph_name + "_" + std::to_string(ctx->threads) + "_" + src0->name + "_" + src1->name;
|
||||
QNN_LOG_INFO("graph name %s", graph_name.c_str());
|
||||
if (ctx->device == QNN_BACKEND_NPU) {
|
||||
QnnHtpGraph_CustomConfig_t hvx_config;
|
||||
|
@ -86,7 +83,7 @@ static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
|
||||
QnnHtpGraph_CustomConfig_t opt_config;
|
||||
opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG;
|
||||
opt_config.optimizationOption.floatValue = 1; // 1 / 3
|
||||
opt_config.optimizationOption.floatValue = 1; // 1 / 3
|
||||
QnnGraph_Config_t graph_opt_config;
|
||||
graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_opt_config.customConfig = &opt_config;
|
||||
|
@ -98,28 +95,22 @@ static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_vtcm_config.customConfig = &vtcm_config;
|
||||
|
||||
const QnnGraph_Config_t* p_graphconfig[] = { &graph_hvx_config,
|
||||
&graph_dlbc_config,
|
||||
&graph_vtcm_config,
|
||||
&graph_opt_config,
|
||||
NULL };
|
||||
error = qnn_raw_interface.graphCreate(
|
||||
instance->get_qnn_context_handle(), graph_name.c_str(), p_graphconfig,
|
||||
&graph_handle);
|
||||
}
|
||||
else {
|
||||
error = qnn_raw_interface.graphCreate(
|
||||
instance->get_qnn_context_handle(), graph_name.c_str(), nullptr,
|
||||
&graph_handle);
|
||||
const QnnGraph_Config_t *p_graphconfig[] = { &graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config,
|
||||
&graph_opt_config, NULL };
|
||||
error = qnn_raw_interface.graphCreate(instance->get_qnn_context_handle(), graph_name.c_str(), p_graphconfig,
|
||||
&graph_handle);
|
||||
} else {
|
||||
error = qnn_raw_interface.graphCreate(instance->get_qnn_context_handle(), graph_name.c_str(), nullptr,
|
||||
&graph_handle);
|
||||
}
|
||||
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_INFO("can't create qnn graph handle with graph name %s, "
|
||||
QNN_LOG_INFO(
|
||||
"can't create qnn graph handle with graph name %s, "
|
||||
"error = %d\n",
|
||||
graph_name.c_str(), error);
|
||||
goto failure;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
QNN_LOG_INFO("create qnn graph handle with graph name %s ok\n", graph_name.c_str());
|
||||
}
|
||||
|
||||
|
@ -139,30 +130,20 @@ static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
|
||||
Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() };
|
||||
Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() };
|
||||
Qnn_OpConfig_t op_config = {
|
||||
(Qnn_OpConfigVersion_t)1,
|
||||
.v1 = {"ggml_op_add",
|
||||
QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
QNN_OP_ELEMENT_WISE_ADD,
|
||||
0, qnn_params,
|
||||
2, tensor_inputs,
|
||||
1,tensor_outputs}
|
||||
};
|
||||
Qnn_OpConfig_t op_config = { (Qnn_OpConfigVersion_t)1,
|
||||
.v1 = { "ggml_op_add", QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_ELEMENT_WISE_ADD, 0,
|
||||
qnn_params, 2, tensor_inputs, 1, tensor_outputs } };
|
||||
error = qnn_raw_interface.graphAddNode(graph_handle, op_config);
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_INFO("error = %d\n", error);
|
||||
goto failure;
|
||||
}
|
||||
error = qnn_raw_interface.graphFinalize(graph_handle,
|
||||
nullptr, nullptr);
|
||||
error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr);
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_INFO("error = %d\n", error);
|
||||
goto failure;
|
||||
}
|
||||
error = qnn_raw_interface.graphExecute(graph_handle,
|
||||
tensor_inputs, 2,
|
||||
tensor_outputs, 1,
|
||||
nullptr, nullptr);
|
||||
error = qnn_raw_interface.graphExecute(graph_handle, tensor_inputs, 2, tensor_outputs, 1, nullptr, nullptr);
|
||||
if (ctx->device == QNN_BACKEND_NPU) {
|
||||
if (QNN_COMMON_ERROR_SYSTEM_COMMUNICATION == error) {
|
||||
QNN_LOG_WARN("NPU crashed. SSR detected. Caused QNN graph execute error\n");
|
||||
|
@ -173,24 +154,18 @@ static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
goto failure;
|
||||
}
|
||||
|
||||
auto graph_item = std::make_tuple(graph_handle,
|
||||
tensor_input0.get_qnn_tensor(),
|
||||
tensor_input1.get_qnn_tensor(),
|
||||
tensor_output.get_qnn_tensor());
|
||||
auto graph_item = std::make_tuple(graph_handle, tensor_input0.get_qnn_tensor(), tensor_input1.get_qnn_tensor(),
|
||||
tensor_output.get_qnn_tensor());
|
||||
instance->_qnn_graph_map[map_entry] = graph_item;
|
||||
}
|
||||
else {
|
||||
auto& graph_item = instance->_qnn_graph_map[map_entry];
|
||||
} else {
|
||||
auto &graph_item = instance->_qnn_graph_map[map_entry];
|
||||
qnn::ggml_qnn_tensor_input tensor_input0(src0, std::get<1>(graph_item), ctx);
|
||||
qnn::ggml_qnn_tensor_input tensor_input1(src1, std::get<2>(graph_item), ctx);
|
||||
qnn::ggml_qnn_tensor_output tensor_output(dst, std::get<3>(graph_item), ctx);
|
||||
|
||||
Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() };
|
||||
Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() };
|
||||
error = qnn_raw_interface.graphExecute(graph_handle,
|
||||
tensor_inputs, 2,
|
||||
tensor_outputs, 1,
|
||||
nullptr, nullptr);
|
||||
error = qnn_raw_interface.graphExecute(graph_handle, tensor_inputs, 2, tensor_outputs, 1, nullptr, nullptr);
|
||||
if (ctx->device == QNN_BACKEND_NPU) {
|
||||
if (QNN_COMMON_ERROR_SYSTEM_COMMUNICATION == error) {
|
||||
QNN_LOG_WARN("NPU crashed. SSR detected. Caused QNN graph execute error\n");
|
||||
|
@ -204,21 +179,18 @@ static void ggml_qnn_add(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
|
||||
failure:
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
|
||||
" x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src0->name, src0->type, ggml_type_name(src0->type),
|
||||
src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0],
|
||||
src0->nb[1], src0->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
|
||||
" x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src1->name, src1->type, ggml_type_name(src1->type),
|
||||
src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0],
|
||||
src1->nb[1], src1->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
|
||||
" x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
||||
dst->name, dst->type, ggml_type_name(dst->type),
|
||||
dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
|
||||
dst->nb[1], dst->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||
", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
|
||||
src0->nb[0], src0->nb[1], src0->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||
", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
|
||||
src1->nb[0], src1->nb[1], src1->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||
", nb = (%5zi, %5zi, %5zi)\n",
|
||||
dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
|
||||
dst->nb[1], dst->nb[2]);
|
||||
}
|
||||
|
||||
perf.info();
|
||||
|
@ -235,16 +207,15 @@ failure:
|
|||
* mul_mat_f16_f32: src0 is F16 and src1 is F32.
|
||||
* mul_mat_q_f32: src0 is quantized (Q4_0, Q4_1, ...), and src1 is F32.
|
||||
*/
|
||||
static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
bool graph_initialized = false;
|
||||
qnn::qnn_instance* instance = nullptr;
|
||||
std::string graph_name = "ggml_op_qnn_mul_mat";
|
||||
Qnn_GraphHandle_t graph_handle = nullptr;
|
||||
Qnn_Param_t qnn_params[] = {};
|
||||
enum ggml_op ggmlop = GGML_OP_MUL_MAT;
|
||||
static void ggml_qnn_mul_mat(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
bool graph_initialized = false;
|
||||
qnn::qnn_instance *instance = nullptr;
|
||||
std::string graph_name = "ggml_op_qnn_mul_mat";
|
||||
Qnn_GraphHandle_t graph_handle = nullptr;
|
||||
Qnn_Param_t qnn_params[] = {};
|
||||
enum ggml_op ggmlop = GGML_OP_MUL_MAT;
|
||||
|
||||
CHECK_PARAMS(ctx, src0, src1, dst);
|
||||
instance = ctx->instance;
|
||||
|
@ -254,21 +225,19 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
|||
perf.start();
|
||||
|
||||
std::string map_entry = std::string(ggml_op_name(ggmlop));
|
||||
if (instance->_qnn_graph_map.find(map_entry) !=
|
||||
instance->_qnn_graph_map.end()) {
|
||||
if (instance->_qnn_graph_map.find(map_entry) != instance->_qnn_graph_map.end()) {
|
||||
graph_initialized = true;
|
||||
auto& graph_item = instance->_qnn_graph_map[map_entry];
|
||||
auto &graph_item = instance->_qnn_graph_map[map_entry];
|
||||
graph_handle = std::get<0>(graph_item);
|
||||
}
|
||||
|
||||
//TODO: for scenarios of quantized data in src0
|
||||
// pass-1: dequantize src0 to FP32
|
||||
// pass-2: dq-src0 * src1
|
||||
// the performance gains is worth although there is performance loss in pass-1
|
||||
// TODO: for scenarios of quantized data in src0
|
||||
// pass-1: dequantize src0 to FP32
|
||||
// pass-2: dq-src0 * src1
|
||||
// the performance gains is worth although there is performance loss in pass-1
|
||||
|
||||
if (!graph_initialized) {
|
||||
graph_name = graph_name + "_" + std::to_string(ctx->threads) +
|
||||
"_" + src0->name + "_" + src1->name;
|
||||
graph_name = graph_name + "_" + std::to_string(ctx->threads) + "_" + src0->name + "_" + src1->name;
|
||||
QNN_LOG_INFO("graph name %s", graph_name.c_str());
|
||||
if (ctx->device == QNN_BACKEND_NPU) {
|
||||
QnnHtpGraph_CustomConfig_t hvx_config;
|
||||
|
@ -288,7 +257,7 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
|||
|
||||
QnnHtpGraph_CustomConfig_t opt_config;
|
||||
opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG;
|
||||
opt_config.optimizationOption.floatValue = 1; //1 / 3
|
||||
opt_config.optimizationOption.floatValue = 1; // 1 / 3
|
||||
QnnGraph_Config_t graph_opt_config;
|
||||
graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_opt_config.customConfig = &opt_config;
|
||||
|
@ -300,22 +269,17 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
|||
graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_vtcm_config.customConfig = &vtcm_config;
|
||||
|
||||
const QnnGraph_Config_t* p_graphconfig[] = { &graph_hvx_config,
|
||||
&graph_dlbc_config,
|
||||
&graph_vtcm_config,
|
||||
&graph_opt_config,
|
||||
NULL };
|
||||
error = qnn_raw_interface.graphCreate(
|
||||
instance->get_qnn_context_handle(), graph_name.c_str(), p_graphconfig,
|
||||
&graph_handle);
|
||||
}
|
||||
else {
|
||||
error = qnn_raw_interface.graphCreate(
|
||||
instance->get_qnn_context_handle(), graph_name.c_str(), nullptr,
|
||||
&graph_handle);
|
||||
const QnnGraph_Config_t *p_graphconfig[] = { &graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config,
|
||||
&graph_opt_config, NULL };
|
||||
error = qnn_raw_interface.graphCreate(instance->get_qnn_context_handle(), graph_name.c_str(), p_graphconfig,
|
||||
&graph_handle);
|
||||
} else {
|
||||
error = qnn_raw_interface.graphCreate(instance->get_qnn_context_handle(), graph_name.c_str(), nullptr,
|
||||
&graph_handle);
|
||||
}
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_INFO("can't create qnn graph handle with graph name %s, "
|
||||
QNN_LOG_INFO(
|
||||
"can't create qnn graph handle with graph name %s, "
|
||||
"error = %d\n",
|
||||
graph_name.c_str(), error);
|
||||
goto failure;
|
||||
|
@ -334,32 +298,22 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
|||
goto failure;
|
||||
}
|
||||
|
||||
Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() };
|
||||
Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() };
|
||||
Qnn_OpConfig_t op_config = {
|
||||
(Qnn_OpConfigVersion_t)1,
|
||||
.v1 = {"ggml_op_mul_mat",
|
||||
QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
QNN_OP_MAT_MUL,
|
||||
0, qnn_params,
|
||||
2, tensor_inputs,
|
||||
1, tensor_outputs}
|
||||
};
|
||||
Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() };
|
||||
Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() };
|
||||
Qnn_OpConfig_t op_config = { (Qnn_OpConfigVersion_t)1,
|
||||
.v1 = { "ggml_op_mul_mat", QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL, 0,
|
||||
qnn_params, 2, tensor_inputs, 1, tensor_outputs } };
|
||||
error = qnn_raw_interface.graphAddNode(graph_handle, op_config);
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_INFO("error = %d\n", error);
|
||||
goto failure;
|
||||
}
|
||||
error = qnn_raw_interface.graphFinalize(graph_handle,
|
||||
nullptr, nullptr);
|
||||
error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr);
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_INFO("error = %d\n", error);
|
||||
goto failure;
|
||||
}
|
||||
error = qnn_raw_interface.graphExecute(graph_handle,
|
||||
tensor_inputs, 2,
|
||||
tensor_outputs, 1,
|
||||
nullptr, nullptr);
|
||||
error = qnn_raw_interface.graphExecute(graph_handle, tensor_inputs, 2, tensor_outputs, 1, nullptr, nullptr);
|
||||
if (ctx->device == QNN_BACKEND_NPU) {
|
||||
if (QNN_COMMON_ERROR_SYSTEM_COMMUNICATION == error) {
|
||||
QNN_LOG_WARN("NPU crashed. SSR detected. Caused QNN graph execute error\n");
|
||||
|
@ -370,24 +324,18 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
|||
goto failure;
|
||||
}
|
||||
|
||||
auto graph_item = std::make_tuple(graph_handle,
|
||||
tensor_input0.get_qnn_tensor(),
|
||||
tensor_input1.get_qnn_tensor(),
|
||||
tensor_output.get_qnn_tensor());
|
||||
auto graph_item = std::make_tuple(graph_handle, tensor_input0.get_qnn_tensor(), tensor_input1.get_qnn_tensor(),
|
||||
tensor_output.get_qnn_tensor());
|
||||
instance->_qnn_graph_map[map_entry] = graph_item;
|
||||
}
|
||||
else {
|
||||
auto& graph_item = instance->_qnn_graph_map[map_entry];
|
||||
} else {
|
||||
auto &graph_item = instance->_qnn_graph_map[map_entry];
|
||||
qnn::ggml_qnn_tensor_input tensor_input0(src0, std::get<1>(graph_item), ctx);
|
||||
qnn::ggml_qnn_tensor_input tensor_input1(src1, std::get<2>(graph_item), ctx);
|
||||
qnn::ggml_qnn_tensor_output tensor_output(dst, std::get<3>(graph_item), ctx);
|
||||
|
||||
Qnn_Tensor_t tensor_inputs[] = { *tensor_input0.get_qnn_tensor(), *tensor_input1.get_qnn_tensor() };
|
||||
Qnn_Tensor_t tensor_outputs[] = { *tensor_output.get_qnn_tensor() };
|
||||
error = qnn_raw_interface.graphExecute(graph_handle,
|
||||
tensor_inputs, 2,
|
||||
tensor_outputs, 1,
|
||||
nullptr, nullptr);
|
||||
error = qnn_raw_interface.graphExecute(graph_handle, tensor_inputs, 2, tensor_outputs, 1, nullptr, nullptr);
|
||||
if (ctx->device == QNN_BACKEND_NPU) {
|
||||
if (QNN_COMMON_ERROR_SYSTEM_COMMUNICATION == error) {
|
||||
QNN_LOG_WARN("NPU crashed. SSR detected. Caused QNN graph execute error\n");
|
||||
|
@ -401,181 +349,127 @@ static void ggml_qnn_mul_mat(ggml_backend_qnn_context* ctx,
|
|||
|
||||
failure:
|
||||
if (QNN_SUCCESS != error) {
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
|
||||
" x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src0->name, src0->type, ggml_type_name(src0->type),
|
||||
src0->ne[0], src0->ne[1], src0->ne[2], src0->nb[0],
|
||||
src0->nb[1], src0->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
|
||||
" x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src1->name, src1->type, ggml_type_name(src1->type),
|
||||
src1->ne[0], src1->ne[1], src1->ne[2], src1->nb[0],
|
||||
src1->nb[1], src1->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64
|
||||
" x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
|
||||
dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0],
|
||||
dst->ne[1], dst->ne[2], dst->nb[0], dst->nb[1], dst->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||
", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src0->name, src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
|
||||
src0->nb[0], src0->nb[1], src0->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||
", nb = (%5zi, %5zi, %5zi)\n",
|
||||
src1->name, src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
|
||||
src1->nb[0], src1->nb[1], src1->nb[2]);
|
||||
QNN_LOG_DEBUG("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64
|
||||
", nb = (%5zi, %5zi, %5zi)\n",
|
||||
dst->name, dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
|
||||
dst->nb[1], dst->nb[2]);
|
||||
}
|
||||
|
||||
perf.info();
|
||||
}
|
||||
|
||||
static void ggml_qnn_repeat(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_repeat(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_get_rows(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_get_rows(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_acc(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_acc(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_div(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_div(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_gelu(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_gelu(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_silu(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_silu(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_gelu_quick(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_gelu_quick(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_tanh(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_tanh(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_relu(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_relu(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_hardsigmoid(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_hardsigmoid(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_hardswish(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_hardswish(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_leaky_relu(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_leaky_relu(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_sqr(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_sqr(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_norm(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_norm(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_group_norm(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_group_norm(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_concat(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_concat(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_upscale(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_upscale(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_pad(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_pad(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_rms_norm(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_rms_norm(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_cpy(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_cpy(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_dup(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
static void ggml_qnn_dup(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
ggml_qnn_cpy(ctx, src0, dst, nullptr);
|
||||
(void)src1;
|
||||
}
|
||||
|
||||
static void ggml_qnn_mul_mat_id(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_mul_mat_id(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_scale(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_scale(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_clamp(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_clamp(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_diag_mask_inf(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_diag_mask_inf(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_soft_max(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_soft_max(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_rope(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
static void ggml_qnn_rope(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
}
|
||||
|
||||
static void ggml_qnn_pool2d(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_pool2d(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_im2col(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
}
|
||||
static void ggml_qnn_im2col(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {}
|
||||
|
||||
static void ggml_qnn_sum_rows(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
static void ggml_qnn_sum_rows(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
}
|
||||
|
||||
static void ggml_qnn_argsort(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
ggml_tensor* dst) {
|
||||
static void ggml_qnn_argsort(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
}
|
||||
|
||||
static void ggml_qnn_nop(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
||||
const ggml_tensor* src1, ggml_tensor* dst) {
|
||||
static void ggml_qnn_nop(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
(void)src0;
|
||||
(void)src1;
|
||||
(void)dst;
|
||||
|
@ -583,33 +477,33 @@ static void ggml_qnn_nop(ggml_backend_qnn_context* ctx, const ggml_tensor* src0,
|
|||
|
||||
qnn::ggml_qnn_op_array_t qnn::ggml_qnn_op_array() {
|
||||
static constexpr const qnn::ggml_qnn_op_t kQnnOpsTable[GGML_OP_COUNT] = {
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
ggml_qnn_add, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
nullptr, // GGML_OP_SUB
|
||||
nullptr, // GGML_OP_MUL
|
||||
nullptr, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
nullptr, // GGML_OP_SQRT
|
||||
nullptr, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
nullptr, // GGML_OP_SUB
|
||||
nullptr, // GGML_OP_MUL
|
||||
nullptr, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
nullptr, // GGML_OP_SQRT
|
||||
nullptr, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
|
||||
ggml_qnn_mul_mat, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
|
||||
nullptr, // GGML_OP_SCALE
|
||||
nullptr, // GGML_OP_SET
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "backend.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
typedef void (*ggml_qnn_op_t)(ggml_backend_qnn_context* ctx,
|
||||
const ggml_tensor* src0,
|
||||
const ggml_tensor* src1,
|
||||
ggml_tensor* dst);
|
||||
typedef void (*ggml_qnn_op_t)(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst);
|
||||
|
||||
typedef const ggml_qnn_op_t(&ggml_qnn_op_array_t)[GGML_OP_COUNT];
|
||||
typedef const ggml_qnn_op_t (&ggml_qnn_op_array_t)[GGML_OP_COUNT];
|
||||
|
||||
ggml_qnn_op_array_t ggml_qnn_op_array();
|
||||
ggml_qnn_op_array_t ggml_qnn_op_array();
|
||||
|
||||
}
|
||||
} // namespace qnn
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include "qnn.hpp"
|
||||
|
@ -11,8 +12,8 @@ struct ggml_backend_qnn_context {
|
|||
int threads;
|
||||
char name[GGML_MAX_NAME];
|
||||
char lib[GGML_MAX_NAME];
|
||||
qnn::qnn_instance* instance;
|
||||
ggml_backend* backend;
|
||||
qnn::qnn_instance *instance;
|
||||
ggml_backend *backend;
|
||||
QNN_INTERFACE_VER_TYPE raw_interface;
|
||||
QNN_SYSTEM_INTERFACE_VER_TYPE raw_system_interface;
|
||||
qnn::qcom_socinfo socinfo;
|
||||
|
|
|
@ -2,30 +2,26 @@
|
|||
#include "logger.hpp"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#if (defined __ANDROID__) || (defined ANDROID)
|
||||
#include <android/log.h>
|
||||
#endif
|
||||
|
||||
#define QNN_LOGBUF_LEN 4096
|
||||
#define QNN_LOGBUF_LEN 4096
|
||||
|
||||
void qnn::internal_log(ggml_log_level level, const char* file,
|
||||
const char* func, int line,
|
||||
const char* format, ...) {
|
||||
void qnn::internal_log(ggml_log_level level, const char *file, const char *func, int line, const char *format, ...) {
|
||||
static std::mutex qnn_internal_log_mutex;
|
||||
static char s_qnn_internal_log_buf[QNN_LOGBUF_LEN];
|
||||
static char s_qnn_internal_log_buf[QNN_LOGBUF_LEN];
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(qnn_internal_log_mutex);
|
||||
va_list args;
|
||||
va_list args;
|
||||
|
||||
va_start(args, format);
|
||||
int len_prefix =
|
||||
snprintf(s_qnn_internal_log_buf, QNN_LOGBUF_LEN,
|
||||
"[%s, %d]: ", func, line);
|
||||
int len = vsnprintf(s_qnn_internal_log_buf + len_prefix,
|
||||
QNN_LOGBUF_LEN - len_prefix, format, args);
|
||||
int len_prefix = snprintf(s_qnn_internal_log_buf, QNN_LOGBUF_LEN, "[%s, %d]: ", func, line);
|
||||
int len = vsnprintf(s_qnn_internal_log_buf + len_prefix, QNN_LOGBUF_LEN - len_prefix, format, args);
|
||||
if (len < (QNN_LOGBUF_LEN - len_prefix)) {
|
||||
#if (defined __ANDROID__) || (defined ANDROID)
|
||||
// for Android APK
|
||||
|
@ -38,32 +34,31 @@ void qnn::internal_log(ggml_log_level level, const char* file,
|
|||
}
|
||||
}
|
||||
|
||||
void qnn::sdk_logcallback(const char* fmt, QnnLog_Level_t level,
|
||||
uint64_t timestamp, va_list argp) {
|
||||
void qnn::sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp) {
|
||||
#if ENABLE_QNNSDK_LOG
|
||||
static std::mutex log_mutex;
|
||||
static std::mutex log_mutex;
|
||||
static unsigned char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN];
|
||||
|
||||
const char* log_level_desc = "";
|
||||
const char *log_level_desc = "";
|
||||
switch (level) {
|
||||
case QNN_LOG_LEVEL_ERROR:
|
||||
log_level_desc = "ERROR";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_WARN:
|
||||
log_level_desc = "WARNING";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_INFO:
|
||||
log_level_desc = "INFO";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_DEBUG:
|
||||
log_level_desc = "DEBUG";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_VERBOSE:
|
||||
log_level_desc = "VERBOSE";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_MAX:
|
||||
log_level_desc = "UNKNOWN";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_ERROR:
|
||||
log_level_desc = "ERROR";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_WARN:
|
||||
log_level_desc = "WARNING";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_INFO:
|
||||
log_level_desc = "INFO";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_DEBUG:
|
||||
log_level_desc = "DEBUG";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_VERBOSE:
|
||||
log_level_desc = "VERBOSE";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_MAX:
|
||||
log_level_desc = "UNKNOWN";
|
||||
break;
|
||||
}
|
||||
|
||||
double ms = (double)timestamp / 1000000.0;
|
||||
|
@ -71,7 +66,7 @@ void qnn::sdk_logcallback(const char* fmt, QnnLog_Level_t level,
|
|||
std::lock_guard<std::mutex> lock(log_mutex);
|
||||
|
||||
memset(s_ggml_qnn_logbuf, 0, QNN_LOGBUF_LEN);
|
||||
vsnprintf(reinterpret_cast<char* const>(s_ggml_qnn_logbuf), QNN_LOGBUF_LEN, fmt, argp);
|
||||
vsnprintf(reinterpret_cast<char *const>(s_ggml_qnn_logbuf), QNN_LOGBUF_LEN, fmt, argp);
|
||||
QNN_LOG_INFO("%8.1fms [%-7s] %s\n", ms, log_level_desc, s_ggml_qnn_logbuf);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -2,48 +2,40 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include "QnnCommon.h"
|
||||
#include "QnnInterface.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "QnnCommon.h"
|
||||
#include "QnnInterface.h"
|
||||
#include "QnnTypes.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
namespace qnn {
|
||||
void internal_log(ggml_log_level level, const char* file,
|
||||
const char* func, int line,
|
||||
const char* format, ...);
|
||||
void internal_log(ggml_log_level level, const char *file, const char *func, int line, const char *format, ...);
|
||||
|
||||
|
||||
void sdk_logcallback(const char* fmt, QnnLog_Level_t level,
|
||||
uint64_t timestamp, va_list argp);
|
||||
}
|
||||
void sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp);
|
||||
} // namespace qnn
|
||||
|
||||
// =================================================================================================
|
||||
//
|
||||
// QNN backend internal log function
|
||||
//
|
||||
// =================================================================================================
|
||||
#define QNN_LOG_ERROR(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#define QNN_LOG_ERROR(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#define QNN_LOG_WARN(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#define QNN_LOG_WARN(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#define QNN_LOG_INFO(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#define QNN_LOG_INFO(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define ENABLE_QNNBACKEND_DEBUG 0 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 0 // enable/disable QNN SDK's internal log
|
||||
#define ENABLE_QNNBACKEND_DEBUG 0 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 0 // enable/disable QNN SDK's internal log
|
||||
#else
|
||||
#define ENABLE_QNNBACKEND_DEBUG 1 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 1 // enable/disable QNN SDK's internal log
|
||||
#define ENABLE_QNNBACKEND_DEBUG 1 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 1 // enable/disable QNN SDK's internal log
|
||||
#endif
|
||||
|
||||
#if ENABLE_QNNBACKEND_DEBUG
|
||||
#define QNN_LOG_DEBUG(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#define QNN_LOG_DEBUG(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#else
|
||||
#define QNN_LOG_DEBUG(...)
|
||||
#endif
|
||||
|
|
|
@ -1,59 +1,55 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include "QnnCommon.h"
|
||||
#include "QnnInterface.h"
|
||||
#include "QnnTypes.h"
|
||||
#include "Saver/QnnSaver.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
namespace qnn {
|
||||
// =================================================================================================
|
||||
//
|
||||
// helper data type / data structure / macros / functions of
|
||||
// Qualcomm QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK
|
||||
// ref:https://github.com/pytorch/executorch/tree/main/backends/qualcomm
|
||||
// =================================================================================================
|
||||
enum sdk_profile_level {
|
||||
profile_off = 0,
|
||||
profile_basic = 1,
|
||||
profile_detail = 2
|
||||
};
|
||||
// =================================================================================================
|
||||
//
|
||||
// helper data type / data structure / macros / functions of
|
||||
// Qualcomm QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK
|
||||
// ref:https://github.com/pytorch/executorch/tree/main/backends/qualcomm
|
||||
// =================================================================================================
|
||||
enum sdk_profile_level { profile_off = 0, profile_basic = 1, profile_detail = 2 };
|
||||
|
||||
enum qcom_htp_arch {
|
||||
NONE = 0,
|
||||
V68 = 68,
|
||||
V69 = 69,
|
||||
V73 = 73,
|
||||
V75 = 75,
|
||||
};
|
||||
enum qcom_htp_arch {
|
||||
NONE = 0,
|
||||
V68 = 68,
|
||||
V69 = 69,
|
||||
V73 = 73,
|
||||
V75 = 75,
|
||||
};
|
||||
|
||||
enum qcom_chipset {
|
||||
UNKNOWN_SM = 0,
|
||||
SM8450 = 36, // v69
|
||||
SM8475 = 42, // v69
|
||||
SM8550 = 43, // v73
|
||||
SM8650 = 57, // v75
|
||||
};
|
||||
enum qcom_chipset {
|
||||
UNKNOWN_SM = 0,
|
||||
SM8450 = 36, // v69
|
||||
SM8475 = 42, // v69
|
||||
SM8550 = 43, // v73
|
||||
SM8650 = 57, // v75
|
||||
};
|
||||
|
||||
struct qcom_socinfo {
|
||||
uint32_t soc_model;
|
||||
size_t htp_arch;
|
||||
size_t vtcm_size_in_mb;
|
||||
};
|
||||
struct qcom_socinfo {
|
||||
uint32_t soc_model;
|
||||
size_t htp_arch;
|
||||
size_t vtcm_size_in_mb;
|
||||
};
|
||||
|
||||
using pfn_rpc_mem_init = void (*)(void);
|
||||
using pfn_rpc_mem_deinit = void (*)(void);
|
||||
using pfn_rpc_mem_alloc = void* (*) (int, uint32_t, int);
|
||||
using pfn_rpc_mem_free = void (*)(void*);
|
||||
using pfn_rpc_mem_to_fd = int (*)(void*);
|
||||
using pfn_rpc_mem_init = void (*)(void);
|
||||
using pfn_rpc_mem_deinit = void (*)(void);
|
||||
using pfn_rpc_mem_alloc = void *(*)(int, uint32_t, int);
|
||||
using pfn_rpc_mem_free = void (*)(void *);
|
||||
using pfn_rpc_mem_to_fd = int (*)(void *);
|
||||
|
||||
using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize);
|
||||
using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders);
|
||||
using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders);
|
||||
}
|
||||
using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize);
|
||||
using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders);
|
||||
using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders);
|
||||
} // namespace qnn
|
||||
|
||||
#define QNN_VER_PTR(x) (&((x).v1)) // TODO: remove this macro after we have a separate header for QNN
|
||||
#define QNN_VER_PTR(x) (&((x).v1)) // TODO: remove this macro after we have a separate header for QNN
|
||||
|
||||
#define RPCMEM_DEFAULT_FLAGS 1
|
||||
#define RPCMEM_HEAP_ID_SYSTEM 25
|
||||
#define RPCMEM_DEFAULT_FLAGS 1
|
||||
#define RPCMEM_HEAP_ID_SYSTEM 25
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,146 +1,127 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "QnnTensor.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
#include "backend.hpp"
|
||||
#include "qnn.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
template <Qnn_TensorType_t _tensorType> class ggml_qnn_tensor_readwrite {
|
||||
public:
|
||||
ggml_qnn_tensor_readwrite(const ggml_tensor* tensor,
|
||||
Qnn_GraphHandle_t graph_handle,
|
||||
ggml_backend_qnn_context* ctx)
|
||||
: _tensor(tensor),
|
||||
_qnn_tensor(reinterpret_cast<Qnn_Tensor_t*>(tensor->extra)),
|
||||
_context(ctx) {
|
||||
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
|
||||
const auto qnn_data_type = datatype_from_ggml_datatype(tensor->type);
|
||||
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
|
||||
QNN_VER_PTR(*_qnn_tensor)->type = _tensorType;
|
||||
if (is_npu) {
|
||||
QNN_VER_PTR(*_qnn_tensor)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = { .data = nullptr, .dataSize = 0 };
|
||||
template <Qnn_TensorType_t _tensorType>
|
||||
class ggml_qnn_tensor_readwrite {
|
||||
public:
|
||||
explicit ggml_qnn_tensor_readwrite(const ggml_tensor *tensor, Qnn_GraphHandle_t graph_handle,
|
||||
ggml_backend_qnn_context *ctx) :
|
||||
_tensor(tensor), _qnn_tensor(reinterpret_cast<Qnn_Tensor_t *>(tensor->extra)), _context(ctx) {
|
||||
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
|
||||
const auto qnn_data_type = datatype_from_ggml_datatype(tensor->type);
|
||||
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
|
||||
QNN_VER_PTR(*_qnn_tensor)->type = _tensorType;
|
||||
if (is_npu) {
|
||||
QNN_VER_PTR(*_qnn_tensor)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = { .data = nullptr, .dataSize = 0 };
|
||||
}
|
||||
|
||||
auto err = ctx->raw_interface.tensorCreateGraphTensor(graph_handle, _qnn_tensor);
|
||||
if (err != QNN_SUCCESS) {
|
||||
QNN_LOG_INFO("error = %d\n", err);
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor, QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
_dimensions[2] = (uint32_t)tensor->ne[2];
|
||||
_dimensions[3] = (uint32_t)tensor->ne[3];
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
|
||||
QNN_VER_PTR(*_qnn_tensor)->rank = qnn::get_ggml_tensor_rank(tensor);
|
||||
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
|
||||
|
||||
if (is_npu) {
|
||||
auto *instance = ctx->instance;
|
||||
uint8_t *qnn_buffer = static_cast<uint8_t *>(instance->alloc_rpcmem(ggml_nbytes(tensor), alignof(void *)));
|
||||
if (!qnn_buffer) {
|
||||
QNN_LOG_WARN("alloc rpcmem failure, %s\n", strerror(errno));
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor, QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
// No free for _qnn_tensor, because it's not registered.
|
||||
return;
|
||||
} else {
|
||||
QNN_LOG_INFO("alloc rpcmem successfully\n");
|
||||
}
|
||||
|
||||
auto err =
|
||||
ctx->raw_interface.tensorCreateGraphTensor(graph_handle, _qnn_tensor);
|
||||
if (err != QNN_SUCCESS) {
|
||||
QNN_LOG_INFO("error = %d\n", err);
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
|
||||
QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
instance->register_rpcmem(qnn_buffer, _qnn_tensor);
|
||||
if (_tensorType == QNN_TENSOR_TYPE_APP_WRITE || _tensorType == QNN_TENSOR_TYPE_APP_READWRITE) {
|
||||
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
} else {
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = { tensor->data, get_ggml_tensor_data_size(tensor) };
|
||||
}
|
||||
}
|
||||
|
||||
explicit ggml_qnn_tensor_readwrite(const ggml_tensor *tensor, Qnn_Tensor_t *qnn_tensor,
|
||||
ggml_backend_qnn_context *ctx) :
|
||||
_tensor(tensor), _qnn_tensor(qnn_tensor), _context(ctx) {
|
||||
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
|
||||
const auto qnn_data_type = qnn::datatype_from_ggml_datatype(tensor->type);
|
||||
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
|
||||
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
_dimensions[2] = (uint32_t)tensor->ne[2];
|
||||
_dimensions[3] = (uint32_t)tensor->ne[3];
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
|
||||
QNN_VER_PTR(*_qnn_tensor)->rank = get_ggml_tensor_rank(tensor);
|
||||
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
|
||||
|
||||
if (is_npu) {
|
||||
uint8_t *qnn_buffer =
|
||||
static_cast<uint8_t *>(ctx->instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*_qnn_tensor)->memHandle));
|
||||
if (qnn_buffer) {
|
||||
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
|
||||
} else {
|
||||
QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n");
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor, QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = { tensor->data, get_ggml_tensor_data_size(tensor) };
|
||||
}
|
||||
}
|
||||
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
_dimensions[2] = (uint32_t)tensor->ne[2];
|
||||
_dimensions[3] = (uint32_t)tensor->ne[3];
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
|
||||
QNN_VER_PTR(*_qnn_tensor)->rank = qnn::get_ggml_tensor_rank(tensor);
|
||||
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
|
||||
|
||||
if (is_npu) {
|
||||
auto* instance = ctx->instance;
|
||||
uint8_t* qnn_buffer = static_cast<uint8_t*>(
|
||||
instance->alloc_rpcmem(ggml_nbytes(tensor), alignof(void*)));
|
||||
if (!qnn_buffer) {
|
||||
QNN_LOG_WARN("alloc rpcmem failure, %s\n", strerror(errno));
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
|
||||
QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
// No free for _qnn_tensor, because it's not registered.
|
||||
return;
|
||||
}
|
||||
else {
|
||||
QNN_LOG_INFO("alloc rpcmem successfully\n");
|
||||
}
|
||||
|
||||
instance->register_rpcmem(qnn_buffer, _qnn_tensor);
|
||||
if (_tensorType == QNN_TENSOR_TYPE_APP_WRITE ||
|
||||
_tensorType == QNN_TENSOR_TYPE_APP_READWRITE) {
|
||||
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
}
|
||||
else {
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = {
|
||||
tensor->data, get_ggml_tensor_data_size(tensor) };
|
||||
}
|
||||
~ggml_qnn_tensor_readwrite() {
|
||||
if ((_tensorType == QNN_TENSOR_TYPE_APP_READWRITE || _tensorType == QNN_TENSOR_TYPE_APP_READ) && _context &&
|
||||
_context->device == QNN_BACKEND_NPU) {
|
||||
uint8_t *qnn_buffer = static_cast<uint8_t *>(
|
||||
_context->instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*_qnn_tensor)->memHandle));
|
||||
memcpy(_tensor->data, qnn_buffer, ggml_nbytes(_tensor));
|
||||
}
|
||||
|
||||
ggml_qnn_tensor_readwrite(const ggml_tensor* tensor, Qnn_Tensor_t* qnn_tensor,
|
||||
ggml_backend_qnn_context* ctx)
|
||||
: _tensor(tensor), _qnn_tensor(qnn_tensor), _context(ctx) {
|
||||
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
|
||||
const auto qnn_data_type = qnn::datatype_from_ggml_datatype(tensor->type);
|
||||
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _old_dimensions;
|
||||
}
|
||||
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
_dimensions[2] = (uint32_t)tensor->ne[2];
|
||||
_dimensions[3] = (uint32_t)tensor->ne[3];
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
|
||||
QNN_VER_PTR(*_qnn_tensor)->rank = get_ggml_tensor_rank(tensor);
|
||||
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
|
||||
bool is_valid() const { return _context; }
|
||||
Qnn_Tensor_t *get_qnn_tensor() const { return _qnn_tensor; }
|
||||
|
||||
if (is_npu) {
|
||||
uint8_t* qnn_buffer =
|
||||
static_cast<uint8_t*>(ctx->instance->get_rpcmem_from_memhandle(
|
||||
QNN_VER_PTR(*_qnn_tensor)->memHandle));
|
||||
if (qnn_buffer) {
|
||||
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
else {
|
||||
QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n");
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
|
||||
QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = {
|
||||
tensor->data, get_ggml_tensor_data_size(tensor) };
|
||||
}
|
||||
}
|
||||
private:
|
||||
const ggml_tensor *_tensor;
|
||||
Qnn_Tensor_t *_qnn_tensor;
|
||||
ggml_backend_qnn_context *_context;
|
||||
uint32_t *_old_dimensions;
|
||||
uint32_t _dimensions[4] = {};
|
||||
|
||||
~ggml_qnn_tensor_readwrite() {
|
||||
if ((_tensorType == QNN_TENSOR_TYPE_APP_READWRITE ||
|
||||
_tensorType == QNN_TENSOR_TYPE_APP_READ) &&
|
||||
_context && _context->device == QNN_BACKEND_NPU) {
|
||||
uint8_t* qnn_buffer =
|
||||
static_cast<uint8_t*>(_context->instance->get_rpcmem_from_memhandle(
|
||||
QNN_VER_PTR(*_qnn_tensor)->memHandle));
|
||||
memcpy(_tensor->data, qnn_buffer, ggml_nbytes(_tensor));
|
||||
}
|
||||
ggml_qnn_tensor_readwrite(const ggml_qnn_tensor_readwrite &) = delete;
|
||||
void operator=(const ggml_qnn_tensor_readwrite &) = delete;
|
||||
ggml_qnn_tensor_readwrite(ggml_qnn_tensor_readwrite &&) = delete;
|
||||
void operator=(ggml_qnn_tensor_readwrite &&) = delete;
|
||||
};
|
||||
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _old_dimensions;
|
||||
}
|
||||
|
||||
bool is_valid() const { return _context; }
|
||||
Qnn_Tensor_t* get_qnn_tensor() const { return _qnn_tensor; }
|
||||
|
||||
private:
|
||||
const ggml_tensor* _tensor;
|
||||
Qnn_Tensor_t* _qnn_tensor;
|
||||
ggml_backend_qnn_context* _context;
|
||||
uint32_t* _old_dimensions;
|
||||
uint32_t _dimensions[4] = {};
|
||||
|
||||
ggml_qnn_tensor_readwrite(const ggml_qnn_tensor_readwrite&) = delete;
|
||||
void operator=(const ggml_qnn_tensor_readwrite&) = delete;
|
||||
ggml_qnn_tensor_readwrite(ggml_qnn_tensor_readwrite&&) = delete;
|
||||
void operator=(ggml_qnn_tensor_readwrite&&) = delete;
|
||||
};
|
||||
|
||||
using ggml_qnn_tensor_output =
|
||||
ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_READ>;
|
||||
using ggml_qnn_tensor_input =
|
||||
ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_WRITE>;
|
||||
using ggml_qnn_tensor_output = ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_READ>;
|
||||
using ggml_qnn_tensor_input = ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_WRITE>;
|
||||
|
||||
} // namespace qnn
|
||||
|
|
|
@ -2,14 +2,15 @@
|
|||
#include "utils.hpp"
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "qnn-types.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
// TODO: mapping more ggml data type to QNN data type
|
||||
// ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684
|
||||
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) {
|
||||
switch (ggmltype) {
|
||||
// TODO: mapping more ggml data type to QNN data type
|
||||
// ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684
|
||||
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) {
|
||||
switch (ggmltype) {
|
||||
case GGML_TYPE_F16:
|
||||
return QNN_DATATYPE_FLOAT_16;
|
||||
case GGML_TYPE_F32:
|
||||
|
@ -22,24 +23,22 @@ namespace qnn {
|
|||
return QNN_DATATYPE_SFIXED_POINT_4;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return QNN_DATATYPE_UNDEFINED;
|
||||
}
|
||||
return QNN_DATATYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor* tensor) {
|
||||
uint32_t rank = 0;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
|
||||
rank++;
|
||||
}
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) {
|
||||
uint32_t rank = 0;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
|
||||
rank++;
|
||||
}
|
||||
return rank;
|
||||
}
|
||||
return rank;
|
||||
}
|
||||
|
||||
|
||||
const char* get_backend_name(int n_backend_type) {
|
||||
switch (n_backend_type) {
|
||||
const char *get_backend_name(int n_backend_type) {
|
||||
switch (n_backend_type) {
|
||||
case QNN_BACKEND_CPU:
|
||||
return "QNN-CPU";
|
||||
case QNN_BACKEND_GPU:
|
||||
|
@ -50,11 +49,11 @@ namespace qnn {
|
|||
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char* get_chipset_desc(uint32_t chipset_id) {
|
||||
switch (chipset_id) {
|
||||
const char *get_chipset_desc(uint32_t chipset_id) {
|
||||
switch (chipset_id) {
|
||||
case SM8450:
|
||||
return "SM8450";
|
||||
case SM8475:
|
||||
|
@ -65,11 +64,11 @@ namespace qnn {
|
|||
return "SM8650";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char* get_htparch_desc(size_t htp_arch) {
|
||||
switch (htp_arch) {
|
||||
const char *get_htparch_desc(size_t htp_arch) {
|
||||
switch (htp_arch) {
|
||||
case V68:
|
||||
return "QCOM_HTP_V68";
|
||||
case V69:
|
||||
|
@ -80,37 +79,36 @@ namespace qnn {
|
|||
return "QCOM_HTP_V75";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
intptr_t align_to(size_t alignment, intptr_t offset) {
|
||||
return offset % alignment == 0
|
||||
? offset
|
||||
: offset + (static_cast<intptr_t>(alignment) - offset % static_cast<intptr_t>(alignment));
|
||||
}
|
||||
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) {
|
||||
/*
|
||||
size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]);
|
||||
size_t n_dims = qnn_get_ggml_tensor_rank(tensor);
|
||||
for (int i = 1; i < n_dims; i++) {
|
||||
data_size *= tensor->ne[i];
|
||||
}
|
||||
|
||||
intptr_t align_to(size_t alignment, intptr_t offset) {
|
||||
return offset % alignment == 0
|
||||
? offset
|
||||
: offset + (static_cast<intptr_t>(alignment) -
|
||||
offset % static_cast<intptr_t>(alignment));
|
||||
}
|
||||
return data_size;
|
||||
*/
|
||||
return ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor* tensor) {
|
||||
/*
|
||||
size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]);
|
||||
size_t n_dims = qnn_get_ggml_tensor_rank(tensor);
|
||||
for (int i = 1; i < n_dims; i++) {
|
||||
data_size *= tensor->ne[i];
|
||||
}
|
||||
|
||||
return data_size;
|
||||
*/
|
||||
return ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
//
|
||||
// QNN backend internal helper functions
|
||||
//
|
||||
// =================================================================================================
|
||||
// TODO: only support GGML_OP_ADD/GGML_OP_MUL/GGML_OP_MUL_MAT
|
||||
const char* opname_from_ggmlop(enum ggml_op ggmlop) {
|
||||
switch (ggmlop) {
|
||||
// =================================================================================================
|
||||
//
|
||||
// QNN backend internal helper functions
|
||||
//
|
||||
// =================================================================================================
|
||||
// TODO: only support GGML_OP_ADD/GGML_OP_MUL/GGML_OP_MUL_MAT
|
||||
const char *opname_from_ggmlop(enum ggml_op ggmlop) {
|
||||
switch (ggmlop) {
|
||||
case GGML_OP_ADD:
|
||||
return QNN_OP_ELEMENT_WISE_ADD;
|
||||
case GGML_OP_MUL:
|
||||
|
@ -119,8 +117,8 @@ namespace qnn {
|
|||
return QNN_OP_MAT_MUL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
|
|
|
@ -1,246 +1,239 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <inttypes.h>
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <string>
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include <string>
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include "logger.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype);
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor* tensor);
|
||||
const char* get_backend_name(int n_backend_type);
|
||||
const char* get_chipset_desc(uint32_t chipset_id);
|
||||
const char* get_htparch_desc(size_t htp_arch);
|
||||
intptr_t align_to(size_t alignment, intptr_t offset);
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor* tensor);
|
||||
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype);
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor);
|
||||
const char *get_backend_name(int n_backend_type);
|
||||
const char *get_chipset_desc(uint32_t chipset_id);
|
||||
const char *get_htparch_desc(size_t htp_arch);
|
||||
intptr_t align_to(size_t alignment, intptr_t offset);
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor);
|
||||
|
||||
const char* opname_from_ggmlop(enum ggml_op ggmlop);
|
||||
|
||||
template <typename Fn> Fn load_qnn_functionpointers(void* handle, const char* function_name) {
|
||||
return reinterpret_cast<Fn>(dlsym(handle, function_name));
|
||||
}
|
||||
|
||||
inline int validate_tensor_version(Qnn_Tensor_t tensor) {
|
||||
if (tensor.version != QNN_TENSOR_VERSION_1) {
|
||||
QNN_LOG_WARN(
|
||||
"validate_tensor_version() tensor %s, got unsupported version %d\n",
|
||||
tensor.v1.name, tensor.version);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.id;
|
||||
}
|
||||
|
||||
return 0u;
|
||||
}
|
||||
|
||||
inline const char* get_qnn_tensorname(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.name;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.type;
|
||||
}
|
||||
return QNN_TENSOR_TYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_TensorDataFormat_t
|
||||
get_qnn_tensor_dataformat(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.dataFormat;
|
||||
}
|
||||
return QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
|
||||
}
|
||||
|
||||
inline Qnn_DataType_t
|
||||
get_qnn_tensor_datatype(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.dataType;
|
||||
}
|
||||
return QNN_DATATYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_QuantizeParams_t
|
||||
get_qnn_tensor_quantparams(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.quantizeParams;
|
||||
}
|
||||
return QNN_QUANTIZE_PARAMS_INIT;
|
||||
}
|
||||
|
||||
inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.rank;
|
||||
}
|
||||
return 0u;
|
||||
}
|
||||
|
||||
inline uint32_t* get_qnn_tensor_dimensions(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.dimensions;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t& tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.memType;
|
||||
}
|
||||
return QNN_TENSORMEMTYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_id(Qnn_Tensor_t& tensor, uint32_t id) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.id = id;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_name(Qnn_Tensor_t& tensor, const char* name) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.name = name;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_type(Qnn_Tensor_t& tensor, Qnn_TensorType_t type) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.type = type;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dataformat(Qnn_Tensor_t& tensor, Qnn_TensorDataFormat_t format) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.dataFormat = format;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_datatype(Qnn_Tensor_t& tensor, Qnn_DataType_t dataType) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.dataType = dataType;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_quantparams(Qnn_Tensor_t& tensor, Qnn_QuantizeParams_t params) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.quantizeParams = params;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_rank(Qnn_Tensor_t& tensor, uint32_t rank) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.rank = rank;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dimensions(Qnn_Tensor_t& tensor, uint32_t* dims) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.dimensions = dims;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_memtype(Qnn_Tensor_t& tensor, Qnn_TensorMemType_t mem_type) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.memType = mem_type;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t& tensor, Qnn_ClientBuffer_t client_buf) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.clientBuf = client_buf;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_memhandle(Qnn_Tensor_t& tensor, Qnn_MemHandle_t handle) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.memHandle = handle;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if ENABLE_QNNBACKEND_PERF
|
||||
class qnn_perf {
|
||||
public:
|
||||
qnn_perf(const std::string& perf_name) : _perf_name(std::move(perf_name)) {};
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf&) = delete;
|
||||
qnn_perf& operator= (const qnn_perf&) = delete;
|
||||
|
||||
void start() {
|
||||
_begin_time = ggml_time_us();
|
||||
}
|
||||
|
||||
void info() {
|
||||
_end_time = ggml_time_us();
|
||||
_duration = (_end_time - _begin_time);
|
||||
QNN_LOG_INFO("duration of %s : %lld microseconds\n", _perf_name.c_str(), _duration);
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t _begin_time = 0LL;
|
||||
int64_t _end_time = 0LL;
|
||||
int64_t _duration = 0LL;
|
||||
std::string _perf_name;
|
||||
};
|
||||
#else
|
||||
class qnn_perf {
|
||||
public:
|
||||
qnn_perf(const std::string& perf_name) {}
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf&) = delete;
|
||||
qnn_perf& operator= (const qnn_perf&) = delete;
|
||||
|
||||
void start() {}
|
||||
void info() {}
|
||||
};
|
||||
#endif
|
||||
const char *opname_from_ggmlop(enum ggml_op ggmlop);
|
||||
|
||||
template <typename Fn>
|
||||
Fn load_qnn_functionpointers(void *handle, const char *function_name) {
|
||||
return reinterpret_cast<Fn>(dlsym(handle, function_name));
|
||||
}
|
||||
|
||||
inline int validate_tensor_version(Qnn_Tensor_t tensor) {
|
||||
if (tensor.version != QNN_TENSOR_VERSION_1) {
|
||||
QNN_LOG_WARN("validate_tensor_version() tensor %s, got unsupported version %d\n", tensor.v1.name,
|
||||
tensor.version);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define VALIDATE(value, status) \
|
||||
do { \
|
||||
status = value; \
|
||||
if (status != QNN_SUCCESS) { \
|
||||
QNN_LOG_WARN("%s expected QNN_SUCCESS\n", #value); \
|
||||
return status; \
|
||||
} \
|
||||
inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.id;
|
||||
}
|
||||
|
||||
return 0u;
|
||||
}
|
||||
|
||||
inline const char *get_qnn_tensorname(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.name;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.type;
|
||||
}
|
||||
return QNN_TENSOR_TYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.dataFormat;
|
||||
}
|
||||
return QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
|
||||
}
|
||||
|
||||
inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.dataType;
|
||||
}
|
||||
return QNN_DATATYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.quantizeParams;
|
||||
}
|
||||
return QNN_QUANTIZE_PARAMS_INIT;
|
||||
}
|
||||
|
||||
inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.rank;
|
||||
}
|
||||
return 0u;
|
||||
}
|
||||
|
||||
inline uint32_t *get_qnn_tensor_dimensions(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.dimensions;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t &tensor) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
return tensor.v1.memType;
|
||||
}
|
||||
return QNN_TENSORMEMTYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_id(Qnn_Tensor_t &tensor, uint32_t id) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.id = id;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_name(Qnn_Tensor_t &tensor, const char *name) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.name = name;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_type(Qnn_Tensor_t &tensor, Qnn_TensorType_t type) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.type = type;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dataformat(Qnn_Tensor_t &tensor, Qnn_TensorDataFormat_t format) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.dataFormat = format;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_datatype(Qnn_Tensor_t &tensor, Qnn_DataType_t dataType) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.dataType = dataType;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_quantparams(Qnn_Tensor_t &tensor, Qnn_QuantizeParams_t params) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.quantizeParams = params;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_rank(Qnn_Tensor_t &tensor, uint32_t rank) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.rank = rank;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dimensions(Qnn_Tensor_t &tensor, uint32_t *dims) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.dimensions = dims;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_memtype(Qnn_Tensor_t &tensor, Qnn_TensorMemType_t mem_type) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.memType = mem_type;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t &tensor, Qnn_ClientBuffer_t client_buf) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.clientBuf = client_buf;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_memhandle(Qnn_Tensor_t &tensor, Qnn_MemHandle_t handle) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_1) {
|
||||
tensor.v1.memHandle = handle;
|
||||
}
|
||||
}
|
||||
|
||||
#if ENABLE_QNNBACKEND_PERF
|
||||
class qnn_perf {
|
||||
public:
|
||||
qnn_perf(const std::string &perf_name) : _perf_name(std::move(perf_name)) {};
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf &) = delete;
|
||||
qnn_perf &operator=(const qnn_perf &) = delete;
|
||||
|
||||
void start() { _begin_time = ggml_time_us(); }
|
||||
|
||||
void info() {
|
||||
_end_time = ggml_time_us();
|
||||
_duration = (_end_time - _begin_time);
|
||||
QNN_LOG_INFO("duration of %s : %lld microseconds\n", _perf_name.c_str(), _duration);
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t _begin_time = 0LL;
|
||||
int64_t _end_time = 0LL;
|
||||
int64_t _duration = 0LL;
|
||||
std::string _perf_name;
|
||||
};
|
||||
#else
|
||||
class qnn_perf {
|
||||
public:
|
||||
qnn_perf(const std::string &perf_name) {}
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf &) = delete;
|
||||
qnn_perf &operator=(const qnn_perf &) = delete;
|
||||
|
||||
void start() {}
|
||||
void info() {}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace qnn
|
||||
|
||||
#define VALIDATE(value, status) \
|
||||
do { \
|
||||
status = value; \
|
||||
if (status != QNN_SUCCESS) { \
|
||||
QNN_LOG_WARN("%s expected QNN_SUCCESS\n", #value); \
|
||||
return status; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor)
|
||||
#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor)
|
||||
#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_FORMAT(tensor) qnn::get_qnn_tensor_dataformat(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_TYPE(tensor) qnn::get_qnn_tensor_datatype(tensor)
|
||||
#define QNN_TENSOR_GET_QUANT_PARAMS(tensor) qnn::get_qnn_tensor_quantparams(tensor)
|
||||
#define QNN_TENSOR_GET_RANK(tensor) qnn::get_qnn_tensor_rank(tensor)
|
||||
#define QNN_TENSOR_GET_DIMENSIONS(tensor) qnn::get_qnn_tensor_dimensions(tensor)
|
||||
#define QNN_TENSOR_GET_MEM_TYPE(tensor) qnn::get_qnn_tensor_memtype(tensor)
|
||||
#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor)
|
||||
#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor)
|
||||
#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_FORMAT(tensor) qnn::get_qnn_tensor_dataformat(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_TYPE(tensor) qnn::get_qnn_tensor_datatype(tensor)
|
||||
#define QNN_TENSOR_GET_QUANT_PARAMS(tensor) qnn::get_qnn_tensor_quantparams(tensor)
|
||||
#define QNN_TENSOR_GET_RANK(tensor) qnn::get_qnn_tensor_rank(tensor)
|
||||
#define QNN_TENSOR_GET_DIMENSIONS(tensor) qnn::get_qnn_tensor_dimensions(tensor)
|
||||
#define QNN_TENSOR_GET_MEM_TYPE(tensor) qnn::get_qnn_tensor_memtype(tensor)
|
||||
|
||||
#define QNN_TENSOR_SET_ID(tensor, value) qnn::set_qnn_tensor_id(tensor, value)
|
||||
#define QNN_TENSOR_SET_NAME(tensor, value) qnn::set_qnn_tensor_name(tensor, value)
|
||||
#define QNN_TENSOR_SET_TYPE(tensor, value) qnn::set_qnn_tensor_type(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) qnn::set_qnn_tensor_dataformat(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) qnn::set_qnn_tensor_datatype(tensor, value)
|
||||
#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) qnn::set_qnn_tensor_quantparams(tensor, value)
|
||||
#define QNN_TENSOR_SET_RANK(tensor, value) qnn::set_qnn_tensor_rank(tensor, value)
|
||||
#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dimensions(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value)
|
||||
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value)
|
||||
#define VALIDATE_TENSOR_VERSION(tensor, err) VALIDATE(qnn::validate_tensor_version(tensor), err)
|
||||
#define QNN_TENSOR_SET_ID(tensor, value) qnn::set_qnn_tensor_id(tensor, value)
|
||||
#define QNN_TENSOR_SET_NAME(tensor, value) qnn::set_qnn_tensor_name(tensor, value)
|
||||
#define QNN_TENSOR_SET_TYPE(tensor, value) qnn::set_qnn_tensor_type(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) qnn::set_qnn_tensor_dataformat(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) qnn::set_qnn_tensor_datatype(tensor, value)
|
||||
#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) qnn::set_qnn_tensor_quantparams(tensor, value)
|
||||
#define QNN_TENSOR_SET_RANK(tensor, value) qnn::set_qnn_tensor_rank(tensor, value)
|
||||
#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dimensions(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value)
|
||||
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value)
|
||||
#define VALIDATE_TENSOR_VERSION(tensor, err) VALIDATE(qnn::validate_tensor_version(tensor), err)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue