add unary op template and more ops
This commit is contained in:
parent
7cbc4fbd8c
commit
100ccd5e7f
4 changed files with 225 additions and 74 deletions
|
@ -1,6 +1,5 @@
|
|||
#include "ggml-qnn.h"
|
||||
|
||||
#include <list>
|
||||
#include <stdatomic.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -15,6 +14,7 @@
|
|||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
|
@ -142,7 +142,8 @@ struct ggml_backend_qnn_buffer_type_context {
|
|||
// =================================================================================================
|
||||
static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context *ctx, const struct ggml_tensor *tensor,
|
||||
bool b_dump_tensor_info) {
|
||||
if (ggml_is_empty(tensor) || !qnn::ggml_qnn_op_array()[tensor->op]) {
|
||||
if (ggml_is_empty(tensor) ||
|
||||
(!qnn::ggml_qnn_unary_op_array()[tensor->op] && !qnn::ggml_qnn_binary_op_array()[tensor->op])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -161,19 +162,6 @@ static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context *ctx, const struct g
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO: support other GGML OPs using QNN API
|
||||
// a GENERAL approach could fix this problem in a standalone PR of refine ggml backend
|
||||
// subsystem for hybrid inference between CPU&GPU / CPU&NPU easily(less the 100 LoC and no
|
||||
// side-effect to the existing codes) for ANY ggml backends which the backend's
|
||||
// ggml_backend_xxx_buffer_is_host return true. this approach could be found at:
|
||||
// https://github.com/ggerganov/llama.cpp/pull/7641
|
||||
bool supported_op = false;
|
||||
supported_op = (tensor->op == GGML_OP_ADD);
|
||||
supported_op = ((tensor->op == GGML_OP_ADD) || (tensor->op == GGML_OP_MUL_MAT));
|
||||
if (!supported_op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: support other quantized data type
|
||||
if (ggml_is_quantized(src0->type)) {
|
||||
if (src0->type != GGML_TYPE_Q8_0 && src0->type != GGML_TYPE_Q4_0) {
|
||||
|
@ -192,14 +180,18 @@ static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context *ctx, const struct g
|
|||
}
|
||||
|
||||
bool ggml_qnn_compute_forward(ggml_backend_qnn_context *ctx, struct ggml_tensor *tensor) {
|
||||
auto func = qnn::ggml_qnn_op_array()[tensor->op];
|
||||
if (!func) {
|
||||
QNN_LOG_WARN("unsupported op %d", tensor->op);
|
||||
return false;
|
||||
auto unary_op = qnn::ggml_qnn_unary_op_array()[tensor->op];
|
||||
if (unary_op) {
|
||||
return unary_op(ctx, tensor->src[0], tensor);
|
||||
}
|
||||
|
||||
func(ctx, tensor->src[0], tensor->src[1], tensor);
|
||||
return true;
|
||||
auto binary_op = qnn::ggml_qnn_binary_op_array()[tensor->op];
|
||||
if (binary_op) {
|
||||
return binary_op(ctx, tensor->src[0], tensor->src[1], tensor);
|
||||
}
|
||||
|
||||
QNN_LOG_WARN("unsupported op %d", tensor->op);
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char *ggml_backend_qnn_buffer_get_name(ggml_backend_buffer_t buffer) {
|
||||
|
@ -343,6 +335,7 @@ GGML_CALL static void ggml_backend_qnn_free(ggml_backend_t backend) {
|
|||
|
||||
auto instance = g_qnn_mgr[ctx->device].instance;
|
||||
if (instance) {
|
||||
ctx->qnn_unary_graph_cache.clear();
|
||||
for (const auto &graph_item : ctx->qnn_binary_graph_cache) {
|
||||
QNN_LOG_INFO("graph type:%s", graph_item.first.c_str());
|
||||
}
|
||||
|
|
|
@ -12,6 +12,23 @@
|
|||
|
||||
namespace {
|
||||
|
||||
bool qnn_is_valid_params(ggml_backend_qnn_context *ctx, const ggml_tensor *src, ggml_tensor *dst) {
|
||||
if (!ctx || !src || !dst) {
|
||||
QNN_LOG_WARN("invalid params\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto instance = ctx->instance;
|
||||
auto *tensor0 = qnn::ggml_qnn_tensor::from_ggml_tensor(src);
|
||||
auto *tensor1 = qnn::ggml_qnn_tensor::from_ggml_tensor(dst);
|
||||
if (!instance || !tensor0 || !tensor1) {
|
||||
QNN_LOG_WARN("invalid tensors\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool qnn_is_valid_params(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
if (!ctx || !src0 || !src1 || !dst) {
|
||||
|
@ -33,15 +50,13 @@ bool qnn_is_valid_params(ggml_backend_qnn_context *ctx, const ggml_tensor *src0,
|
|||
|
||||
} // namespace
|
||||
|
||||
#define CHECK_PARAMS(ctx, src0, src1, dst) \
|
||||
do { \
|
||||
if (!qnn_is_valid_params((ctx), (src0), (src1), (dst))) { \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
#define CHECK_PARAMS(ctx, ...) \
|
||||
if (!qnn_is_valid_params((ctx), __VA_ARGS__)) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#else
|
||||
#define CHECK_PARAMS(ctx, src0, src1, dst)
|
||||
#define CHECK_PARAMS(ctx, ...)
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
@ -125,15 +140,33 @@ bool execute_graph(qnn::ggml_qnn_graph<_InputSize, _OutputSize> *graph,
|
|||
return true;
|
||||
}
|
||||
|
||||
qnn::ggml_qnn_unary_graph_cache_t &get_qnn_graph_cache(ggml_backend_qnn_context *ctx,
|
||||
const std::array<const ggml_tensor *, 1> &inputs,
|
||||
const std::array<ggml_tensor *, 1> &outputs) {
|
||||
GGML_UNUSED(inputs);
|
||||
GGML_UNUSED(outputs);
|
||||
return ctx->qnn_unary_graph_cache;
|
||||
}
|
||||
|
||||
qnn::ggml_qnn_binary_graph_cache_t &get_qnn_graph_cache(ggml_backend_qnn_context *ctx,
|
||||
const std::array<const ggml_tensor *, 2> &inputs,
|
||||
const std::array<ggml_tensor *, 1> &outputs) {
|
||||
GGML_UNUSED(inputs);
|
||||
GGML_UNUSED(outputs);
|
||||
return ctx->qnn_binary_graph_cache;
|
||||
}
|
||||
|
||||
template <size_t _InputSize, size_t _OutputSize>
|
||||
qnn::ggml_qnn_graph_binary *get_qnn_graph_from_cache(ggml_backend_qnn_context *ctx, ggml_op op,
|
||||
const std::string &qnn_op,
|
||||
const std::array<const ggml_tensor *, _InputSize> &inputs,
|
||||
const std::array<ggml_tensor *, _OutputSize> &outputs) {
|
||||
qnn::ggml_qnn_graph<_InputSize, _OutputSize> *get_qnn_graph_from_cache(
|
||||
ggml_backend_qnn_context *ctx, ggml_op op, const std::string &qnn_op,
|
||||
const std::array<const ggml_tensor *, _InputSize> &inputs, const std::array<ggml_tensor *, _OutputSize> &outputs) {
|
||||
using graph_t = qnn::ggml_qnn_graph<_InputSize, _OutputSize>;
|
||||
|
||||
auto &graph_cache = get_qnn_graph_cache(ctx, inputs, outputs);
|
||||
const std::string graph_key(ggml_op_name(op));
|
||||
auto it = ctx->qnn_binary_graph_cache.find(graph_key);
|
||||
qnn::ggml_qnn_graph_binary *graph_ptr = nullptr;
|
||||
if (it != ctx->qnn_binary_graph_cache.end()) {
|
||||
auto it = graph_cache.find(graph_key);
|
||||
graph_t *graph_ptr = nullptr;
|
||||
if (it != graph_cache.end()) {
|
||||
graph_ptr = it->second.get();
|
||||
} else {
|
||||
std::string graph_name = graph_key + "_" + std::to_string(ctx->threads);
|
||||
|
@ -141,49 +174,49 @@ qnn::ggml_qnn_graph_binary *get_qnn_graph_from_cache(ggml_backend_qnn_context *c
|
|||
graph_name += "_";
|
||||
graph_name += input->name;
|
||||
}
|
||||
auto graph = std::make_unique<qnn::ggml_qnn_graph_binary>(graph_name, (QNNBackend)(ctx->device),
|
||||
ctx->instance->get_qnn_context_handle(),
|
||||
ctx->raw_interface, ctx->socinfo.vtcm_size_in_mb);
|
||||
auto graph =
|
||||
std::make_unique<graph_t>(graph_name, (QNNBackend)(ctx->device), ctx->instance->get_qnn_context_handle(),
|
||||
ctx->raw_interface, ctx->socinfo.vtcm_size_in_mb);
|
||||
|
||||
if (!graph->is_valid()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!qnn_bind_tensors_to_graph<2, 1>(graph.get(), qnn_op.c_str(), inputs, outputs)) {
|
||||
if (!qnn_bind_tensors_to_graph<_InputSize, _OutputSize>(graph.get(), qnn_op.c_str(), inputs, outputs)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
graph_ptr = graph.get();
|
||||
ctx->qnn_binary_graph_cache[graph_key] = std::move(graph);
|
||||
graph_cache[graph_key] = std::move(graph);
|
||||
}
|
||||
|
||||
return graph_ptr;
|
||||
}
|
||||
|
||||
constexpr const char *kGgmlOpToQnnOp[] = {
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
QNN_OP_ELEMENT_WISE_ADD, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
nullptr, // GGML_OP_SUB
|
||||
QNN_OP_ELEMENT_WISE_MULTIPLY, // GGML_OP_MUL
|
||||
nullptr, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
nullptr, // GGML_OP_SQRT
|
||||
nullptr, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
QNN_OP_ELEMENT_WISE_ADD, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
QNN_OP_ELEMENT_WISE_SUBTRACT, // GGML_OP_SUB
|
||||
QNN_OP_ELEMENT_WISE_MULTIPLY, // GGML_OP_MUL
|
||||
QNN_OP_ELEMENT_WISE_DIVIDE, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // GGML_OP_SQRT
|
||||
nullptr, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
|
||||
QNN_OP_MAT_MUL, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
|
@ -249,7 +282,7 @@ static_assert(sizeof(kGgmlOpToQnnOp) / sizeof(kGgmlOpToQnnOp[0]) == GGML_OP_COUN
|
|||
"GGML_OP_COUNT does not match the size of the ops table");
|
||||
|
||||
template <ggml_op _GgmlOp>
|
||||
void qnn_binary_op_impl(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
bool qnn_binary_op_impl(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst) {
|
||||
static_assert(kGgmlOpToQnnOp[_GgmlOp] != nullptr, "GGML_OP does not have a corresponding QNN_OP");
|
||||
|
||||
|
@ -270,20 +303,136 @@ void qnn_binary_op_impl(ggml_backend_qnn_context *ctx, const ggml_tensor *src0,
|
|||
print_ggml_tensor(src1);
|
||||
print_ggml_tensor(dst);
|
||||
}
|
||||
|
||||
return succeed;
|
||||
}
|
||||
|
||||
template <ggml_op _GgmlOp>
|
||||
bool qnn_unary_op_impl(ggml_backend_qnn_context *ctx, const ggml_tensor *src, ggml_tensor *dst) {
|
||||
static_assert(kGgmlOpToQnnOp[_GgmlOp] != nullptr, "GGML_OP does not have a corresponding QNN_OP");
|
||||
|
||||
CHECK_PARAMS(ctx, src, dst);
|
||||
|
||||
qnn::qnn_perf perf(ggml_op_name(_GgmlOp));
|
||||
perf.start();
|
||||
|
||||
bool succeed = false;
|
||||
auto *graph_ptr = get_qnn_graph_from_cache<1, 1>(ctx, _GgmlOp, kGgmlOpToQnnOp[_GgmlOp], { src }, { dst });
|
||||
if (graph_ptr) {
|
||||
succeed = execute_graph<1, 1>(graph_ptr, { src }, { dst });
|
||||
}
|
||||
|
||||
if (!succeed) {
|
||||
print_ggml_tensor(src);
|
||||
print_ggml_tensor(dst);
|
||||
}
|
||||
|
||||
return succeed;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
qnn::ggml_qnn_op_array_t qnn::ggml_qnn_op_array() {
|
||||
static constexpr const qnn::ggml_qnn_op_t kQnnOpsTable[] = {
|
||||
qnn::ggml_qnn_unary_op_array_t qnn::ggml_qnn_unary_op_array() {
|
||||
static constexpr const qnn::ggml_qnn_unary_op_t kQnnOpsTable[] = {
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
nullptr, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
nullptr, // GGML_OP_SUB
|
||||
nullptr, // GGML_OP_MUL
|
||||
nullptr, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
qnn_unary_op_impl<GGML_OP_SQRT>, // GGML_OP_SQRT
|
||||
nullptr, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
|
||||
nullptr, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
|
||||
nullptr, // GGML_OP_SCALE
|
||||
nullptr, // GGML_OP_SET
|
||||
nullptr, // GGML_OP_CPY
|
||||
nullptr, // GGML_OP_CONT
|
||||
nullptr, // GGML_OP_RESHAPE
|
||||
nullptr, // GGML_OP_VIEW
|
||||
nullptr, // GGML_OP_PERMUTE
|
||||
nullptr, // GGML_OP_TRANSPOSE
|
||||
nullptr, // GGML_OP_GET_ROWS
|
||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||
nullptr, // GGML_OP_DIAG
|
||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||
nullptr, // GGML_OP_SOFT_MAX
|
||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||
nullptr, // GGML_OP_ROPE
|
||||
nullptr, // GGML_OP_ROPE_BACK
|
||||
nullptr, // GGML_OP_CLAMP
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||
nullptr, // GGML_OP_IM2COL
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||
nullptr, // GGML_OP_POOL_1D
|
||||
nullptr, // GGML_OP_POOL_2D
|
||||
nullptr, // GGML_OP_UPSCALE
|
||||
nullptr, // GGML_OP_PAD
|
||||
nullptr, // GGML_OP_ARANGE
|
||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||
nullptr, // GGML_OP_ARGSORT
|
||||
nullptr, // GGML_OP_LEAKY_RELU
|
||||
|
||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||
nullptr, // GGML_OP_SSM_CONV
|
||||
nullptr, // GGML_OP_SSM_SCAN
|
||||
nullptr, // GGML_OP_WIN_PART
|
||||
nullptr, // GGML_OP_WIN_UNPART
|
||||
nullptr, // GGML_OP_GET_REL_POS
|
||||
nullptr, // GGML_OP_ADD_REL_POS
|
||||
|
||||
nullptr, // GGML_OP_UNARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_UNARY
|
||||
nullptr, // GGML_OP_MAP_BINARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3_F32
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3
|
||||
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
};
|
||||
|
||||
static_assert(sizeof(kQnnOpsTable) / sizeof(kQnnOpsTable[0]) == GGML_OP_COUNT,
|
||||
"GGML_OP_COUNT does not match the size of the ops table");
|
||||
return kQnnOpsTable;
|
||||
}
|
||||
|
||||
qnn::ggml_qnn_binary_op_array_t qnn::ggml_qnn_binary_op_array() {
|
||||
static constexpr const qnn::ggml_qnn_binary_op_t kQnnOpsTable[] = {
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
qnn_binary_op_impl<GGML_OP_ADD>, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
nullptr, // GGML_OP_SUB
|
||||
qnn_binary_op_impl<GGML_OP_SUB>, // GGML_OP_SUB
|
||||
qnn_binary_op_impl<GGML_OP_MUL>, // GGML_OP_MUL
|
||||
nullptr, // GGML_OP_DIV
|
||||
qnn_binary_op_impl<GGML_OP_DIV>, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
nullptr, // GGML_OP_SQRT
|
||||
nullptr, // GGML_OP_LOG
|
||||
|
|
|
@ -6,11 +6,14 @@
|
|||
|
||||
namespace qnn {
|
||||
|
||||
typedef void (*ggml_qnn_op_t)(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst);
|
||||
typedef bool (*ggml_qnn_unary_op_t)(ggml_backend_qnn_context *ctx, const ggml_tensor *src, ggml_tensor *dst);
|
||||
typedef bool (*ggml_qnn_binary_op_t)(ggml_backend_qnn_context *ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
ggml_tensor *dst);
|
||||
|
||||
typedef const ggml_qnn_op_t (&ggml_qnn_op_array_t)[GGML_OP_COUNT];
|
||||
typedef const ggml_qnn_unary_op_t (&ggml_qnn_unary_op_array_t)[GGML_OP_COUNT];
|
||||
typedef const ggml_qnn_binary_op_t (&ggml_qnn_binary_op_array_t)[GGML_OP_COUNT];
|
||||
|
||||
ggml_qnn_op_array_t ggml_qnn_op_array();
|
||||
ggml_qnn_unary_op_array_t ggml_qnn_unary_op_array();
|
||||
ggml_qnn_binary_op_array_t ggml_qnn_binary_op_array();
|
||||
|
||||
} // namespace qnn
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
#include "graph.hpp"
|
||||
#include "qnn.hpp"
|
||||
|
||||
namespace qnn {
|
||||
typedef std::unordered_map<std::string, std::unique_ptr<qnn::ggml_qnn_graph_unary>> ggml_qnn_unary_graph_cache_t;
|
||||
typedef std::unordered_map<std::string, std::unique_ptr<qnn::ggml_qnn_graph_binary>> ggml_qnn_binary_graph_cache_t;
|
||||
} // namespace qnn
|
||||
|
||||
struct ggml_backend_qnn_context {
|
||||
int device;
|
||||
int threads;
|
||||
|
@ -21,5 +26,6 @@ struct ggml_backend_qnn_context {
|
|||
QNN_INTERFACE_VER_TYPE raw_interface;
|
||||
QNN_SYSTEM_INTERFACE_VER_TYPE raw_system_interface;
|
||||
qnn::qcom_socinfo socinfo;
|
||||
std::unordered_map<std::string, std::unique_ptr<qnn::ggml_qnn_graph_binary>> qnn_binary_graph_cache;
|
||||
qnn::ggml_qnn_unary_graph_cache_t qnn_unary_graph_cache;
|
||||
qnn::ggml_qnn_binary_graph_cache_t qnn_binary_graph_cache;
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue