split logger function, tensors and backend from main qnn source
This commit is contained in:
parent
dfe159ffff
commit
99320620b0
9 changed files with 1606 additions and 1486 deletions
1511
ggml-qnn.cpp
1511
ggml-qnn.cpp
File diff suppressed because it is too large
Load diff
24
ggml-qnn/backend.hpp
Normal file
24
ggml-qnn/backend.hpp
Normal file
|
@ -0,0 +1,24 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include "QnnCommon.h"
|
||||
#include "QnnContext.h"
|
||||
#include "QnnBackend.h"
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include "qnn.hpp"
|
||||
|
||||
struct ggml_backend_qnn_context {
|
||||
int device;
|
||||
int threads;
|
||||
char name[GGML_MAX_NAME];
|
||||
char lib[GGML_MAX_NAME];
|
||||
qnn_internal::qnn_instance* instance;
|
||||
struct ggml_backend* backend;
|
||||
QNN_INTERFACE_VER_TYPE raw_interface;
|
||||
QNN_SYSTEM_INTERFACE_VER_TYPE raw_system_interface;
|
||||
struct qcom_socinfo socinfo;
|
||||
};
|
78
ggml-qnn/logger.cpp
Normal file
78
ggml-qnn/logger.cpp
Normal file
|
@ -0,0 +1,78 @@
|
|||
|
||||
#include "logger.hpp"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <mutex>
|
||||
|
||||
#if (defined __ANDROID__) || (defined ANDROID)
|
||||
#include <android/log.h>
|
||||
#endif
|
||||
|
||||
#define QNN_LOGBUF_LEN 4096
|
||||
|
||||
void qnn::internal_log(ggml_log_level level, const char* file,
|
||||
const char* func, int line,
|
||||
const char* format, ...) {
|
||||
static std::mutex qnn_internal_log_mutex;
|
||||
static char s_qnn_internal_log_buf[QNN_LOGBUF_LEN];
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(qnn_internal_log_mutex);
|
||||
va_list args;
|
||||
|
||||
va_start(args, format);
|
||||
int len_prefix =
|
||||
snprintf(s_qnn_internal_log_buf, QNN_LOGBUF_LEN,
|
||||
"[%s, %d]: ", func, line);
|
||||
int len = vsnprintf(s_qnn_internal_log_buf + len_prefix,
|
||||
QNN_LOGBUF_LEN - len_prefix, format, args);
|
||||
if (len < (QNN_LOGBUF_LEN - len_prefix)) {
|
||||
#if (defined __ANDROID__) || (defined ANDROID)
|
||||
// for Android APK
|
||||
__android_log_print(level, "ggml-qnn", "%s\n", s_qnn_internal_log_buf);
|
||||
#endif
|
||||
// for Android command line application or WoA(Windows on ARM)
|
||||
printf("%s\n", s_qnn_internal_log_buf);
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
}
|
||||
|
||||
void qnn::sdk_logcallback(const char* fmt, QnnLog_Level_t level,
|
||||
uint64_t timestamp, va_list argp) {
|
||||
#if ENABLE_QNNSDK_LOG
|
||||
static std::mutex log_mutex;
|
||||
static unsigned char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN];
|
||||
|
||||
const char* log_level_desc = "";
|
||||
switch (level) {
|
||||
case QNN_LOG_LEVEL_ERROR:
|
||||
log_level_desc = "ERROR";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_WARN:
|
||||
log_level_desc = "WARNING";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_INFO:
|
||||
log_level_desc = "INFO";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_DEBUG:
|
||||
log_level_desc = "DEBUG";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_VERBOSE:
|
||||
log_level_desc = "VERBOSE";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_MAX:
|
||||
log_level_desc = "UNKNOWN";
|
||||
break;
|
||||
}
|
||||
|
||||
double ms = (double)timestamp / 1000000.0;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(log_mutex);
|
||||
|
||||
memset(s_ggml_qnn_logbuf, 0, QNN_LOGBUF_LEN);
|
||||
vsnprintf(reinterpret_cast<char* const>(s_ggml_qnn_logbuf), QNN_LOGBUF_LEN, fmt, argp);
|
||||
QNN_LOG_INFO("%8.1fms [%-7s] %s\n", ms, log_level_desc, s_ggml_qnn_logbuf);
|
||||
}
|
||||
#endif
|
||||
}
|
49
ggml-qnn/logger.hpp
Normal file
49
ggml-qnn/logger.hpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include "QnnCommon.h"
|
||||
#include "QnnInterface.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
namespace qnn {
|
||||
void internal_log(ggml_log_level level, const char* file,
|
||||
const char* func, int line,
|
||||
const char* format, ...);
|
||||
|
||||
|
||||
void sdk_logcallback(const char* fmt, QnnLog_Level_t level,
|
||||
uint64_t timestamp, va_list argp);
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
//
|
||||
// QNN backend internal log function
|
||||
//
|
||||
// =================================================================================================
|
||||
#define QNN_LOG_ERROR(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#define QNN_LOG_WARN(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#define QNN_LOG_INFO(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define ENABLE_QNNBACKEND_DEBUG 0 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 0 // enable/disable QNN SDK's internal log
|
||||
#else
|
||||
#define ENABLE_QNNBACKEND_DEBUG 1 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 1 // enable/disable QNN SDK's internal log
|
||||
#endif
|
||||
|
||||
#if ENABLE_QNNBACKEND_DEBUG
|
||||
#define QNN_LOG_DEBUG(...) \
|
||||
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#else
|
||||
#define QNN_LOG_DEBUG(...)
|
||||
#endif
|
46
ggml-qnn/qnn-types.hpp
Normal file
46
ggml-qnn/qnn-types.hpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
namespace qnn {
|
||||
// =================================================================================================
|
||||
//
|
||||
// helper data type / data structure / macros / functions of
|
||||
// Qualcomm QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK
|
||||
// ref:https://github.com/pytorch/executorch/tree/main/backends/qualcomm
|
||||
// =================================================================================================
|
||||
enum sdk_profile_level {
|
||||
profile_off = 0,
|
||||
profile_basic = 1,
|
||||
profile_detail = 2
|
||||
};
|
||||
|
||||
enum qcom_htp_arch {
|
||||
NONE = 0,
|
||||
V68 = 68,
|
||||
V69 = 69,
|
||||
V73 = 73,
|
||||
V75 = 75,
|
||||
};
|
||||
|
||||
enum qcom_chipset {
|
||||
UNKNOWN_SM = 0,
|
||||
SM8450 = 36, // v69
|
||||
SM8475 = 42, // v69
|
||||
SM8550 = 43, // v73
|
||||
SM8650 = 57, // v75
|
||||
};
|
||||
|
||||
using pfn_rpc_mem_init = void (*)(void);
|
||||
using pfn_rpc_mem_deinit = void (*)(void);
|
||||
using pfn_rpc_mem_alloc = void* (*) (int, uint32_t, int);
|
||||
using pfn_rpc_mem_free = void (*)(void*);
|
||||
using pfn_rpc_mem_to_fd = int (*)(void*);
|
||||
|
||||
struct qcom_socinfo {
|
||||
uint32_t soc_model;
|
||||
size_t htp_arch;
|
||||
size_t vtcm_size_in_mb;
|
||||
};
|
||||
}
|
||||
|
||||
#define QNN_VER_PTR(x) (&((x).v1)) // TODO: remove this macro after we have a separate header for QNN
|
1139
ggml-qnn/qnn.hpp
Normal file
1139
ggml-qnn/qnn.hpp
Normal file
File diff suppressed because it is too large
Load diff
145
ggml-qnn/tensor.hpp
Normal file
145
ggml-qnn/tensor.hpp
Normal file
|
@ -0,0 +1,145 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "QnnTensor.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
#include "backend.hpp"
|
||||
#include "qnn.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
template <Qnn_TensorType_t _tensorType> class ggml_qnn_tensor_readwrite {
|
||||
public:
|
||||
ggml_qnn_tensor_readwrite(const ggml_tensor* tensor,
|
||||
Qnn_GraphHandle_t graph_handle,
|
||||
ggml_backend_qnn_context* ctx)
|
||||
: _tensor(tensor),
|
||||
_qnn_tensor(reinterpret_cast<Qnn_Tensor_t*>(tensor->extra)),
|
||||
_context(ctx) {
|
||||
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
|
||||
const auto qnn_data_type = datatype_from_ggml_datatype(tensor->type);
|
||||
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
|
||||
QNN_VER_PTR(*_qnn_tensor)->type = _tensorType;
|
||||
if (is_npu) {
|
||||
QNN_VER_PTR(*_qnn_tensor)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = { .data = nullptr, .dataSize = 0 };
|
||||
}
|
||||
|
||||
auto err =
|
||||
ctx->raw_interface.tensorCreateGraphTensor(graph_handle, _qnn_tensor);
|
||||
if (err != QNN_SUCCESS) {
|
||||
QNN_LOG_INFO("error = %d\n", err);
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
|
||||
QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
_dimensions[2] = (uint32_t)tensor->ne[2];
|
||||
_dimensions[3] = (uint32_t)tensor->ne[3];
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
|
||||
QNN_VER_PTR(*_qnn_tensor)->rank = qnn::get_ggml_tensor_rank(tensor);
|
||||
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
|
||||
|
||||
if (is_npu) {
|
||||
qnn_instance* instance = ctx->instance;
|
||||
uint8_t* qnn_buffer = static_cast<uint8_t*>(
|
||||
instance->alloc_rpcmem(ggml_nbytes(tensor), alignof(void*)));
|
||||
if (!qnn_buffer) {
|
||||
QNN_LOG_WARN("alloc rpcmem failure, %s\n", strerror(errno));
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
|
||||
QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
// No free for _qnn_tensor, because it's not registered.
|
||||
return;
|
||||
}
|
||||
else {
|
||||
QNN_LOG_INFO("alloc rpcmem successfully\n");
|
||||
}
|
||||
|
||||
instance->register_rpcmem(qnn_buffer, _qnn_tensor);
|
||||
if (_tensorType == QNN_TENSOR_TYPE_APP_WRITE ||
|
||||
_tensorType == QNN_TENSOR_TYPE_APP_READWRITE) {
|
||||
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
}
|
||||
else {
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = {
|
||||
tensor->data, qnn_get_ggml_tensor_data_size(tensor) };
|
||||
}
|
||||
}
|
||||
|
||||
ggml_qnn_tensor_readwrite(const ggml_tensor* tensor, Qnn_Tensor_t* qnn_tensor,
|
||||
ggml_backend_qnn_context* ctx)
|
||||
: _tensor(tensor), _qnn_tensor(qnn_tensor), _context(ctx) {
|
||||
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
|
||||
const auto qnn_data_type = qnn_datatype_from_ggml_datatype(tensor->type);
|
||||
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
|
||||
|
||||
_dimensions[0] = (uint32_t)tensor->ne[0];
|
||||
_dimensions[1] = (uint32_t)tensor->ne[1];
|
||||
_dimensions[2] = (uint32_t)tensor->ne[2];
|
||||
_dimensions[3] = (uint32_t)tensor->ne[3];
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
|
||||
QNN_VER_PTR(*_qnn_tensor)->rank = qnn_get_ggml_tensor_rank(tensor);
|
||||
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
|
||||
|
||||
if (is_npu) {
|
||||
uint8_t* qnn_buffer =
|
||||
static_cast<uint8_t*>(ctx->instance->get_rpcmem_from_memhandle(
|
||||
QNN_VER_PTR(*_qnn_tensor)->memHandle));
|
||||
if (qnn_buffer) {
|
||||
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
else {
|
||||
QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n");
|
||||
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
|
||||
QNN_TENSOR_GET_NAME(*_qnn_tensor));
|
||||
_context = nullptr;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
QNN_VER_PTR(*_qnn_tensor)->clientBuf = {
|
||||
tensor->data, qnn_get_ggml_tensor_data_size(tensor) };
|
||||
}
|
||||
}
|
||||
|
||||
~ggml_qnn_tensor_readwrite() {
|
||||
if ((_tensorType == QNN_TENSOR_TYPE_APP_READWRITE ||
|
||||
_tensorType == QNN_TENSOR_TYPE_APP_READ) &&
|
||||
_context && _context->device == QNN_BACKEND_NPU) {
|
||||
uint8_t* qnn_buffer =
|
||||
static_cast<uint8_t*>(_context->instance->get_rpcmem_from_memhandle(
|
||||
QNN_VER_PTR(*_qnn_tensor)->memHandle));
|
||||
memcpy(_tensor->data, qnn_buffer, ggml_nbytes(_tensor));
|
||||
}
|
||||
|
||||
QNN_VER_PTR(*_qnn_tensor)->dimensions = _old_dimensions;
|
||||
}
|
||||
|
||||
bool is_valid() const { return _context; }
|
||||
Qnn_Tensor_t* get_qnn_tensor() const { return _qnn_tensor; }
|
||||
|
||||
private:
|
||||
const ggml_tensor* _tensor;
|
||||
Qnn_Tensor_t* _qnn_tensor;
|
||||
ggml_backend_qnn_context* _context;
|
||||
uint32_t* _old_dimensions;
|
||||
uint32_t _dimensions[4] = {};
|
||||
|
||||
ggml_qnn_tensor_readwrite(const ggml_qnn_tensor_readwrite&) = delete;
|
||||
void operator=(const ggml_qnn_tensor_readwrite&) = delete;
|
||||
ggml_qnn_tensor_readwrite(ggml_qnn_tensor_readwrite&&) = delete;
|
||||
void operator=(ggml_qnn_tensor_readwrite&&) = delete;
|
||||
};
|
||||
|
||||
using ggml_qnn_tensor_output =
|
||||
ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_READ>;
|
||||
using ggml_qnn_tensor_input =
|
||||
ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_WRITE>;
|
||||
|
||||
} // namespace qnn
|
99
ggml-qnn/utils.hpp
Normal file
99
ggml-qnn/utils.hpp
Normal file
|
@ -0,0 +1,99 @@
|
|||
#pragma once
|
||||
|
||||
#include "QnnTypes.h"
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "qnn-types.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
// TODO: mapping more ggml data type to QNN data type
|
||||
// ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684
|
||||
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) {
|
||||
switch (ggmltype) {
|
||||
case GGML_TYPE_F16:
|
||||
return QNN_DATATYPE_FLOAT_16;
|
||||
case GGML_TYPE_F32:
|
||||
return QNN_DATATYPE_FLOAT_32;
|
||||
case GGML_TYPE_I8:
|
||||
return QNN_DATATYPE_INT_8;
|
||||
case GGML_TYPE_Q8_0:
|
||||
return QNN_DATATYPE_SFIXED_POINT_8;
|
||||
case GGML_TYPE_Q4_0:
|
||||
return QNN_DATATYPE_SFIXED_POINT_4;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return QNN_DATATYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor* tensor) {
|
||||
uint32_t rank = 0;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
|
||||
rank++;
|
||||
}
|
||||
}
|
||||
return rank;
|
||||
}
|
||||
|
||||
|
||||
const char* get_backend_name(int n_backend_type) {
|
||||
switch (n_backend_type) {
|
||||
case QNN_BACKEND_CPU:
|
||||
return "QNN-CPU";
|
||||
case QNN_BACKEND_GPU:
|
||||
return "QNN-GPU";
|
||||
case QNN_BACKEND_NPU:
|
||||
return "QNN-NPU";
|
||||
case QNN_BACKEND_GGML:
|
||||
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
const char* get_chipset_desc(uint32_t chipset_id) {
|
||||
switch (chipset_id) {
|
||||
case SM8450:
|
||||
return "SM8450";
|
||||
case SM8475:
|
||||
return "SM8475";
|
||||
case SM8550:
|
||||
return "SM8550";
|
||||
case SM8650:
|
||||
return "SM8650";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
const char* get_htparch_desc(size_t htp_arch) {
|
||||
switch (htp_arch) {
|
||||
case V68:
|
||||
return "QCOM_HTP_V68";
|
||||
case V69:
|
||||
return "QCOM_HTP_V69";
|
||||
case V73:
|
||||
return "QCOM_HTP_V73";
|
||||
case V75:
|
||||
return "QCOM_HTP_V75";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Fn> Fn load_qnn_functionpointers(void* handle, const char* function_name) {
|
||||
return reinterpret_cast<Fn>(dlsym(handle, function_name));
|
||||
}
|
||||
|
||||
intptr_t align_to(size_t alignment, intptr_t offset) {
|
||||
return offset % alignment == 0
|
||||
? offset
|
||||
: offset + (static_cast<intptr_t>(alignment) -
|
||||
offset % static_cast<intptr_t>(alignment));
|
||||
}
|
||||
|
||||
}
|
|
@ -20,6 +20,7 @@ set(SOURCE_FILES
|
|||
../../ggml-alloc.c
|
||||
../../ggml-backend.c
|
||||
../../ggml-quants.c
|
||||
../../ggml-qnn/logger.cpp
|
||||
../../ggml-qnn.cpp
|
||||
ggml-qnn-ut.cpp
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue