split logger function, tensors and backend from main qnn source

This commit is contained in:
hongruichen 2024-06-19 12:25:32 +08:00
parent dfe159ffff
commit 99320620b0
9 changed files with 1606 additions and 1486 deletions

File diff suppressed because it is too large Load diff

24
ggml-qnn/backend.hpp Normal file
View file

@ -0,0 +1,24 @@
#pragma once
#include "QnnTypes.h"
#include "QnnCommon.h"
#include "QnnContext.h"
#include "QnnBackend.h"
#include "ggml.h"
#include "ggml-backend.h"
#include "qnn.hpp"
struct ggml_backend_qnn_context {
int device;
int threads;
char name[GGML_MAX_NAME];
char lib[GGML_MAX_NAME];
qnn_internal::qnn_instance* instance;
struct ggml_backend* backend;
QNN_INTERFACE_VER_TYPE raw_interface;
QNN_SYSTEM_INTERFACE_VER_TYPE raw_system_interface;
struct qcom_socinfo socinfo;
};

78
ggml-qnn/logger.cpp Normal file
View file

@ -0,0 +1,78 @@
#include "logger.hpp"
#include <stdio.h>
#include <mutex>
#if (defined __ANDROID__) || (defined ANDROID)
#include <android/log.h>
#endif
#define QNN_LOGBUF_LEN 4096
void qnn::internal_log(ggml_log_level level, const char* file,
const char* func, int line,
const char* format, ...) {
static std::mutex qnn_internal_log_mutex;
static char s_qnn_internal_log_buf[QNN_LOGBUF_LEN];
{
std::lock_guard<std::mutex> lock(qnn_internal_log_mutex);
va_list args;
va_start(args, format);
int len_prefix =
snprintf(s_qnn_internal_log_buf, QNN_LOGBUF_LEN,
"[%s, %d]: ", func, line);
int len = vsnprintf(s_qnn_internal_log_buf + len_prefix,
QNN_LOGBUF_LEN - len_prefix, format, args);
if (len < (QNN_LOGBUF_LEN - len_prefix)) {
#if (defined __ANDROID__) || (defined ANDROID)
// for Android APK
__android_log_print(level, "ggml-qnn", "%s\n", s_qnn_internal_log_buf);
#endif
// for Android command line application or WoA(Windows on ARM)
printf("%s\n", s_qnn_internal_log_buf);
}
va_end(args);
}
}
void qnn::sdk_logcallback(const char* fmt, QnnLog_Level_t level,
uint64_t timestamp, va_list argp) {
#if ENABLE_QNNSDK_LOG
static std::mutex log_mutex;
static unsigned char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN];
const char* log_level_desc = "";
switch (level) {
case QNN_LOG_LEVEL_ERROR:
log_level_desc = "ERROR";
break;
case QNN_LOG_LEVEL_WARN:
log_level_desc = "WARNING";
break;
case QNN_LOG_LEVEL_INFO:
log_level_desc = "INFO";
break;
case QNN_LOG_LEVEL_DEBUG:
log_level_desc = "DEBUG";
break;
case QNN_LOG_LEVEL_VERBOSE:
log_level_desc = "VERBOSE";
break;
case QNN_LOG_LEVEL_MAX:
log_level_desc = "UNKNOWN";
break;
}
double ms = (double)timestamp / 1000000.0;
{
std::lock_guard<std::mutex> lock(log_mutex);
memset(s_ggml_qnn_logbuf, 0, QNN_LOGBUF_LEN);
vsnprintf(reinterpret_cast<char* const>(s_ggml_qnn_logbuf), QNN_LOGBUF_LEN, fmt, argp);
QNN_LOG_INFO("%8.1fms [%-7s] %s\n", ms, log_level_desc, s_ggml_qnn_logbuf);
}
#endif
}

49
ggml-qnn/logger.hpp Normal file
View file

@ -0,0 +1,49 @@
#pragma once
#include <stdint.h>
#include "QnnTypes.h"
#include "QnnCommon.h"
#include "QnnInterface.h"
#include "System/QnnSystemInterface.h"
#include "ggml.h"
namespace qnn {
void internal_log(ggml_log_level level, const char* file,
const char* func, int line,
const char* format, ...);
void sdk_logcallback(const char* fmt, QnnLog_Level_t level,
uint64_t timestamp, va_list argp);
}
// =================================================================================================
//
// QNN backend internal log function
//
// =================================================================================================
#define QNN_LOG_ERROR(...) \
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#define QNN_LOG_WARN(...) \
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#define QNN_LOG_INFO(...) \
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#ifdef NDEBUG
#define ENABLE_QNNBACKEND_DEBUG 0 // for troubleshooting QNN backend
#define ENABLE_QNNSDK_LOG 0 // enable/disable QNN SDK's internal log
#else
#define ENABLE_QNNBACKEND_DEBUG 1 // for troubleshooting QNN backend
#define ENABLE_QNNSDK_LOG 1 // enable/disable QNN SDK's internal log
#endif
#if ENABLE_QNNBACKEND_DEBUG
#define QNN_LOG_DEBUG(...) \
qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#else
#define QNN_LOG_DEBUG(...)
#endif

46
ggml-qnn/qnn-types.hpp Normal file
View file

@ -0,0 +1,46 @@
#pragma once
namespace qnn {
// =================================================================================================
//
// helper data type / data structure / macros / functions of
// Qualcomm QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK
// ref:https://github.com/pytorch/executorch/tree/main/backends/qualcomm
// =================================================================================================
enum sdk_profile_level {
profile_off = 0,
profile_basic = 1,
profile_detail = 2
};
enum qcom_htp_arch {
NONE = 0,
V68 = 68,
V69 = 69,
V73 = 73,
V75 = 75,
};
enum qcom_chipset {
UNKNOWN_SM = 0,
SM8450 = 36, // v69
SM8475 = 42, // v69
SM8550 = 43, // v73
SM8650 = 57, // v75
};
using pfn_rpc_mem_init = void (*)(void);
using pfn_rpc_mem_deinit = void (*)(void);
using pfn_rpc_mem_alloc = void* (*) (int, uint32_t, int);
using pfn_rpc_mem_free = void (*)(void*);
using pfn_rpc_mem_to_fd = int (*)(void*);
struct qcom_socinfo {
uint32_t soc_model;
size_t htp_arch;
size_t vtcm_size_in_mb;
};
}
#define QNN_VER_PTR(x) (&((x).v1)) // TODO: remove this macro after we have a separate header for QNN

1139
ggml-qnn/qnn.hpp Normal file

File diff suppressed because it is too large Load diff

145
ggml-qnn/tensor.hpp Normal file
View file

@ -0,0 +1,145 @@
#pragma once
#include "QnnTensor.h"
#include "System/QnnSystemInterface.h"
#include "backend.hpp"
#include "qnn.hpp"
namespace qnn {
template <Qnn_TensorType_t _tensorType> class ggml_qnn_tensor_readwrite {
public:
ggml_qnn_tensor_readwrite(const ggml_tensor* tensor,
Qnn_GraphHandle_t graph_handle,
ggml_backend_qnn_context* ctx)
: _tensor(tensor),
_qnn_tensor(reinterpret_cast<Qnn_Tensor_t*>(tensor->extra)),
_context(ctx) {
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
const auto qnn_data_type = datatype_from_ggml_datatype(tensor->type);
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
QNN_VER_PTR(*_qnn_tensor)->type = _tensorType;
if (is_npu) {
QNN_VER_PTR(*_qnn_tensor)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
QNN_VER_PTR(*_qnn_tensor)->clientBuf = { .data = nullptr, .dataSize = 0 };
}
auto err =
ctx->raw_interface.tensorCreateGraphTensor(graph_handle, _qnn_tensor);
if (err != QNN_SUCCESS) {
QNN_LOG_INFO("error = %d\n", err);
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
QNN_TENSOR_GET_NAME(*_qnn_tensor));
_context = nullptr;
return;
}
_dimensions[0] = (uint32_t)tensor->ne[0];
_dimensions[1] = (uint32_t)tensor->ne[1];
_dimensions[2] = (uint32_t)tensor->ne[2];
_dimensions[3] = (uint32_t)tensor->ne[3];
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
QNN_VER_PTR(*_qnn_tensor)->rank = qnn::get_ggml_tensor_rank(tensor);
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
if (is_npu) {
qnn_instance* instance = ctx->instance;
uint8_t* qnn_buffer = static_cast<uint8_t*>(
instance->alloc_rpcmem(ggml_nbytes(tensor), alignof(void*)));
if (!qnn_buffer) {
QNN_LOG_WARN("alloc rpcmem failure, %s\n", strerror(errno));
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
QNN_TENSOR_GET_NAME(*_qnn_tensor));
_context = nullptr;
// No free for _qnn_tensor, because it's not registered.
return;
}
else {
QNN_LOG_INFO("alloc rpcmem successfully\n");
}
instance->register_rpcmem(qnn_buffer, _qnn_tensor);
if (_tensorType == QNN_TENSOR_TYPE_APP_WRITE ||
_tensorType == QNN_TENSOR_TYPE_APP_READWRITE) {
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
}
}
else {
QNN_VER_PTR(*_qnn_tensor)->clientBuf = {
tensor->data, qnn_get_ggml_tensor_data_size(tensor) };
}
}
ggml_qnn_tensor_readwrite(const ggml_tensor* tensor, Qnn_Tensor_t* qnn_tensor,
ggml_backend_qnn_context* ctx)
: _tensor(tensor), _qnn_tensor(qnn_tensor), _context(ctx) {
_old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions;
const auto qnn_data_type = qnn_datatype_from_ggml_datatype(tensor->type);
const bool is_npu = ctx->device == QNN_BACKEND_NPU;
_dimensions[0] = (uint32_t)tensor->ne[0];
_dimensions[1] = (uint32_t)tensor->ne[1];
_dimensions[2] = (uint32_t)tensor->ne[2];
_dimensions[3] = (uint32_t)tensor->ne[3];
QNN_VER_PTR(*_qnn_tensor)->dimensions = _dimensions;
QNN_VER_PTR(*_qnn_tensor)->rank = qnn_get_ggml_tensor_rank(tensor);
QNN_VER_PTR(*_qnn_tensor)->dataType = qnn_data_type;
if (is_npu) {
uint8_t* qnn_buffer =
static_cast<uint8_t*>(ctx->instance->get_rpcmem_from_memhandle(
QNN_VER_PTR(*_qnn_tensor)->memHandle));
if (qnn_buffer) {
memcpy(qnn_buffer, tensor->data, ggml_nbytes(tensor));
}
else {
QNN_LOG_WARN("can't find rpcmem from qnn mem handle\n");
QNN_LOG_DEBUG("tensor%p name %s", _qnn_tensor,
QNN_TENSOR_GET_NAME(*_qnn_tensor));
_context = nullptr;
return;
}
}
else {
QNN_VER_PTR(*_qnn_tensor)->clientBuf = {
tensor->data, qnn_get_ggml_tensor_data_size(tensor) };
}
}
~ggml_qnn_tensor_readwrite() {
if ((_tensorType == QNN_TENSOR_TYPE_APP_READWRITE ||
_tensorType == QNN_TENSOR_TYPE_APP_READ) &&
_context && _context->device == QNN_BACKEND_NPU) {
uint8_t* qnn_buffer =
static_cast<uint8_t*>(_context->instance->get_rpcmem_from_memhandle(
QNN_VER_PTR(*_qnn_tensor)->memHandle));
memcpy(_tensor->data, qnn_buffer, ggml_nbytes(_tensor));
}
QNN_VER_PTR(*_qnn_tensor)->dimensions = _old_dimensions;
}
bool is_valid() const { return _context; }
Qnn_Tensor_t* get_qnn_tensor() const { return _qnn_tensor; }
private:
const ggml_tensor* _tensor;
Qnn_Tensor_t* _qnn_tensor;
ggml_backend_qnn_context* _context;
uint32_t* _old_dimensions;
uint32_t _dimensions[4] = {};
ggml_qnn_tensor_readwrite(const ggml_qnn_tensor_readwrite&) = delete;
void operator=(const ggml_qnn_tensor_readwrite&) = delete;
ggml_qnn_tensor_readwrite(ggml_qnn_tensor_readwrite&&) = delete;
void operator=(ggml_qnn_tensor_readwrite&&) = delete;
};
using ggml_qnn_tensor_output =
ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_READ>;
using ggml_qnn_tensor_input =
ggml_qnn_tensor_readwrite<QNN_TENSOR_TYPE_APP_WRITE>;
} // namespace qnn

99
ggml-qnn/utils.hpp Normal file
View file

@ -0,0 +1,99 @@
#pragma once
#include "QnnTypes.h"
#include "ggml.h"
#include "qnn-types.hpp"
namespace qnn {
// TODO: mapping more ggml data type to QNN data type
// ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684
Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) {
switch (ggmltype) {
case GGML_TYPE_F16:
return QNN_DATATYPE_FLOAT_16;
case GGML_TYPE_F32:
return QNN_DATATYPE_FLOAT_32;
case GGML_TYPE_I8:
return QNN_DATATYPE_INT_8;
case GGML_TYPE_Q8_0:
return QNN_DATATYPE_SFIXED_POINT_8;
case GGML_TYPE_Q4_0:
return QNN_DATATYPE_SFIXED_POINT_4;
default:
break;
}
return QNN_DATATYPE_UNDEFINED;
}
uint32_t get_ggml_tensor_rank(const ggml_tensor* tensor) {
uint32_t rank = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
rank++;
}
}
return rank;
}
const char* get_backend_name(int n_backend_type) {
switch (n_backend_type) {
case QNN_BACKEND_CPU:
return "QNN-CPU";
case QNN_BACKEND_GPU:
return "QNN-GPU";
case QNN_BACKEND_NPU:
return "QNN-NPU";
case QNN_BACKEND_GGML:
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
default:
return "unknown";
}
}
const char* get_chipset_desc(uint32_t chipset_id) {
switch (chipset_id) {
case SM8450:
return "SM8450";
case SM8475:
return "SM8475";
case SM8550:
return "SM8550";
case SM8650:
return "SM8650";
default:
return "unknown";
}
}
const char* get_htparch_desc(size_t htp_arch) {
switch (htp_arch) {
case V68:
return "QCOM_HTP_V68";
case V69:
return "QCOM_HTP_V69";
case V73:
return "QCOM_HTP_V73";
case V75:
return "QCOM_HTP_V75";
default:
return "unknown";
}
}
template <typename Fn> Fn load_qnn_functionpointers(void* handle, const char* function_name) {
return reinterpret_cast<Fn>(dlsym(handle, function_name));
}
intptr_t align_to(size_t alignment, intptr_t offset) {
return offset % alignment == 0
? offset
: offset + (static_cast<intptr_t>(alignment) -
offset % static_cast<intptr_t>(alignment));
}
}

View file

@ -20,6 +20,7 @@ set(SOURCE_FILES
../../ggml-alloc.c
../../ggml-backend.c
../../ggml-quants.c
../../ggml-qnn/logger.cpp
../../ggml-qnn.cpp
ggml-qnn-ut.cpp
)