npu: probe htp info and capacity of rpc ion memory

This commit is contained in:
zhou.weiguo 2024-06-09 23:49:54 +08:00
parent 3e8b61f970
commit d38d4a67d1
No known key found for this signature in database
GPG key ID: 952EA81D18BB2FA8

View file

@ -152,6 +152,28 @@ enum class ggml_qnn_profile_level {
profile_detail = 2
};
enum qcom_htp_arch {
NONE = 0,
V68 = 68,
V69 = 69,
V73 = 73,
V75 = 75,
};
enum qcom_chipset {
UNKNOWN_SM = 0,
SM8450 = 36, // v69
SM8475 = 42, // v69
SM8550 = 43, // v73
SM8650 = 57, // v75
};
struct qcom_socinfo {
int soc_model;
int htp_arch;
int vtcm_size_in_mb;
};
struct ggml_backend_qnn_context {
int device;
int threads;
@ -216,6 +238,29 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
.raw_system_interface = {}},
};
static struct qcom_socinfo g_qnn_soc_info_table[] = {
/* Qualcomm SnapDragon 8 Gen 1 */
[SM8450] = {.soc_model = SM8450,
.htp_arch = V69,
.vtcm_size_in_mb = 8},
/* Qualcomm SnapDragon 8 Gen 1+ */
[SM8475] = {.soc_model = SM8475,
.htp_arch = V69,
.vtcm_size_in_mb = 8},
/* Qualcomm SnapDragon 8 Gen 2 */
[SM8550] = {.soc_model = SM8550,
.htp_arch = V73,
.vtcm_size_in_mb = 8},
/* Qualcomm SnapDragon 8 Gen 3 */
[SM8650] = {.soc_model = SM8650,
.htp_arch = V75,
.vtcm_size_in_mb = 8},
};
// =================================================================================================
//
// QNN helper functions and other internal helper functions
@ -485,6 +530,8 @@ static Qnn_DataType_t qnn_datatype_from_ggml_datatype(enum ggml_type ggmltype) {
return QNN_DATATYPE_INT_8;
case GGML_TYPE_Q8_0:
return QNN_DATATYPE_SFIXED_POINT_8;
case GGML_TYPE_Q4_0:
return QNN_DATATYPE_SFIXED_POINT_4;
default:
break;
}
@ -527,19 +574,34 @@ Fn load_qnn_functionpointers(void * handle, const char * function_name) {
static const char * get_qnn_backend_name(int n_backend_type) {
switch (n_backend_type) {
case 0:
case QNN_BACKEND_CPU:
return "QNN-CPU";
case 1:
case QNN_BACKEND_GPU:
return "QNN-GPU";
case 2:
case QNN_BACKEND_NPU:
return "QNN-NPU";
case 3:
case QNN_BACKEND_GGML:
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
default:
return "unknown";
}
}
static const char * qnn_get_chipset_desc(uint32_t chipset_id) {
switch (chipset_id) {
case SM8450:
return "SM8450";
case SM8475:
return "SM8475";
case SM8550:
return "SM8550";
case SM8650:
return "SM8650";
default:
return "unknown";
}
}
static intptr_t align_to(size_t alignment, intptr_t offset) {
return offset % alignment == 0
? offset
@ -893,6 +955,8 @@ class qnn_instance {
void free_rpcmem(void * buf);
size_t get_rpcmem_capacity() { return _rpcmem_capacity; }
bool is_rpcmem_allocated(void * buf);
bool is_rpcmem_registered(Qnn_MemHandle_t handle) {
@ -977,6 +1041,7 @@ class qnn_instance {
pfn_rpc_mem_init _pfn_rpc_mem_init;
pfn_rpc_mem_deinit _pfn_rpc_mem_deinit;
std::unordered_map<void *, void *> _rpcmem_store_map;
size_t _rpcmem_capacity = 512;
std::string _graph_name;
};
@ -1493,6 +1558,46 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
QNN_LOG_DEBUG("initialize qnn context successfully\n");
}
if (_backend_name.find("Htp") != std::variant_npos) {
const QnnDevice_PlatformInfo_t * p_info = nullptr;
_qnn_raw_interface.deviceGetPlatformInfo(nullptr, &p_info);
QNN_LOG_INFO("device counts %d", p_info->v1.numHwDevices);
QnnDevice_HardwareDeviceInfo_t * infos = p_info->v1.hwDevices;
for (int i = 0; i < p_info->v1.numHwDevices; i++) {
QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d", infos[i].v1.deviceId,
infos[i].v1.deviceType, infos[i].v1.numCores);
QnnDevice_DeviceInfoExtension_t devinfo = infos[i].v1.deviceInfoExtension;
QnnHtpDevice_OnChipDeviceInfoExtension_t chipinfo = devinfo->onChipDevice;
QnnHtpDevice_Arch_t chiparch = chipinfo.arch;
QNN_LOG_INFO("htp_type:%d(%s)", devinfo->devType, (devinfo->devType == QNN_HTP_DEVICE_TYPE_ON_CHIP) ? "ON_CHIP" : "");
QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d, vtcm_size_in_mb:%d MB", chipinfo.socModel,
qnn_get_chipset_desc(chipinfo.socModel), chiparch, chipinfo.vtcmSize);
}
_qnn_raw_interface.deviceFreePlatformInfo(nullptr, p_info);
//TODO: faster approach to probe the accurate capacity of rpc ion memory
size_t candidate_size = 0;
uint8_t * rpc_buffer = nullptr;
const int SIZE_IN_MB = (1 << 20);
size_t probe_slots[] = {1024, 1536, 2048 - 48, 2048};
size_t probe_counts = sizeof(probe_slots) / sizeof(size_t);
for (size_t idx = 0; idx < probe_counts; idx++) {
rpc_buffer = static_cast<uint8_t *>(alloc_rpcmem(probe_slots[idx] * SIZE_IN_MB, 4));
if (nullptr == rpc_buffer) {
QNN_LOG_INFO("alloc rpcmem %d (MB) failure, %s\n", probe_slots[idx], strerror(errno));
break;
} else {
candidate_size = probe_slots[idx];
free_rpcmem(rpc_buffer);
rpc_buffer = nullptr;
}
}
if (candidate_size > _rpcmem_capacity)
_rpcmem_capacity = candidate_size;
QNN_LOG_INFO("capacity of rpc ion memory %d MB\n", _rpcmem_capacity);
}
QNN_LOG_DEBUG("leave qni_init\n");
return 0;
@ -1654,10 +1759,12 @@ static bool ggml_qnn_can_handle_op(ggml_backend_qnn_context *ctx,
const int64_t ne20 = tensor->ne[0];
const int64_t ne21 = tensor->ne[1];
//TODO: support other quatinized data type
if (ggml_is_quantized(src0->type) && (src0->type != GGML_TYPE_Q8_0)) {
//TODO: support other quantized data type
if (ggml_is_quantized(src0->type)) {
if ((src0->type != GGML_TYPE_Q8_0) && (src0->type != GGML_TYPE_Q4_0)) {
return false;
}
}
if (b_dump_tensor_info) {
if (tensor->op == GGML_OP_MUL_MAT) {