review: remove unused QNN helper functions

This commit is contained in:
zhou.weiguo 2024-06-06 20:24:03 +08:00
parent dd29834c11
commit f4c53037ab
No known key found for this signature in database
GPG key ID: 952EA81D18BB2FA8

View file

@ -54,6 +54,7 @@
#include <android/log.h> #include <android/log.h>
#endif #endif
// ================================================================================================= // =================================================================================================
// //
// forward/external/helper declaration // forward/external/helper declaration
@ -61,6 +62,7 @@
// ================================================================================================= // =================================================================================================
class qnn_instance; class qnn_instance;
static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...); static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...);
@ -74,7 +76,7 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
#define GGML_QNN_LOGBUF_LEN 4096 #define GGML_QNN_LOGBUF_LEN 4096
#define GGML_QNN_DEBUG 1 //for troubleshooting QNN backend #define GGML_QNN_DEBUG 0 //for troubleshooting QNN backend
#define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) #define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
@ -86,6 +88,8 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
#define QNN_LOG_DEBUG(...) #define QNN_LOG_DEBUG(...)
#endif #endif
#define QNN_VER_PTR(x) (&((x).v1))
#define VALIDATE(value, status) \ #define VALIDATE(value, status) \
do { \ do { \
@ -98,34 +102,6 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
#define VALIDATE_TENSOR_VERSION(tensor, err) VALIDATE(validate_tensor_version(tensor), err) #define VALIDATE_TENSOR_VERSION(tensor, err) VALIDATE(validate_tensor_version(tensor), err)
#define VALIDATE_OP_CONFIG_VERSION(op, err) VALIDATE(validate_op_config_version(op), err)
#define QNN_VER_PTR(x) (&((x).v1))
#define QNN_OP_CFG_VALID(op_config) ((op_config).version == QNN_OPCONFIG_VERSION_1)
#define QNN_OP_CFG_GET_NAME(op_config) get_qnn_oponfig_name(op_config)
#define QNN_OP_CFG_GET_PACKAGE_NAME(op_config) get_qnn_op_config_packagename(op_config)
#define QNN_OP_CFG_GET_TYPE_NAME(op_config) get_qnn_op_config_typename(op_config)
#define QNN_OP_CFG_GET_NUM_PARAMS(op_config) get_qnn_op_config_numparams(op_config)
#define QNN_OP_CFG_GET_PARAMS(op_config) get_qnn_op_config_params(op_config)
#define QNN_OP_CFG_GET_NUM_INPUTS(op_config) get_qnn_op_config_numinputs(op_config)
#define QNN_OP_CFG_GET_INPUTS(op_config) get_qnn_op_config_inputs(op_config)
#define QNN_OP_CFG_GET_NUM_OUTPUTS(op_config) get_qnn_op_config_numoutputs(op_config)
#define QNN_OP_CFG_GET_OUTPUTS(op_config) get_qnn_op_config_outputs(op_config)
#define QNN_OP_CFG_SET_NAME(op_config, value) set_qnn_op_config_name(op_config, value)
#define QNN_OP_CFG_SET_PACKAGE_NAME(op_config, value) set_qnn_op_config_packagename(op_config, value)
#define QNN_OP_CFG_SET_TYPE_NAME(op_config, value) set_qnn_op_config_typename(op_config, value)
#define QNN_OP_CFG_SET_PARAMS(op_config, num_of_params, params) \
set_qnn_op_config_params(op_config, num_of_params, params)
#define QNN_OP_CFG_SET_INPUTS(op_config, num_of_inputs, inputTensors) \
set_qnn_op_config_inputs(op_config, num_of_inputs, inputTensors)
#define QNN_OP_CFG_SET_OUTPUTS(op_config, num_of_outputs, output_tensors) \
set_qnn_op_config_outputs(op_config, num_of_outputs, output_tensors)
#define QNN_TENSOR_GET_ID(tensor) get_qnn_tensorid(tensor) #define QNN_TENSOR_GET_ID(tensor) get_qnn_tensorid(tensor)
#define QNN_TENSOR_GET_NAME(tensor) get_qnn_tensorname(tensor) #define QNN_TENSOR_GET_NAME(tensor) get_qnn_tensorname(tensor)
#define QNN_TENSOR_GET_TYPE(tensor) get_qnn_tensortype(tensor) #define QNN_TENSOR_GET_TYPE(tensor) get_qnn_tensortype(tensor)
@ -135,8 +111,6 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
#define QNN_TENSOR_GET_RANK(tensor) get_qnn_tensor_rank(tensor) #define QNN_TENSOR_GET_RANK(tensor) get_qnn_tensor_rank(tensor)
#define QNN_TENSOR_GET_DIMENSIONS(tensor) get_qnn_tensor_dimensions(tensor) #define QNN_TENSOR_GET_DIMENSIONS(tensor) get_qnn_tensor_dimensions(tensor)
#define QNN_TENSOR_GET_MEM_TYPE(tensor) get_qnn_tensor_memtype(tensor) #define QNN_TENSOR_GET_MEM_TYPE(tensor) get_qnn_tensor_memtype(tensor)
#define QNN_TENSOR_GET_CLIENT_BUF(tensor) get_qnn_tensor_clientbuf(tensor)
#define QNN_TENSOR_GET_MEM_HANDLE(tensor) get_qnn_tensor_memhandle(tensor)
#define QNN_TENSOR_SET_ID(tensor, value) set_qnn_tensor_id(tensor, value) #define QNN_TENSOR_SET_ID(tensor, value) set_qnn_tensor_id(tensor, value)
#define QNN_TENSOR_SET_NAME(tensor, value) set_qnn_tensor_name(tensor, value) #define QNN_TENSOR_SET_NAME(tensor, value) set_qnn_tensor_name(tensor, value)
@ -150,7 +124,6 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) set_qnn_tensor_clientbuf(tensor, value) #define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) set_qnn_tensor_clientbuf(tensor, value)
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) set_qnn_tensor_memhandle(tensor, value) #define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) set_qnn_tensor_memhandle(tensor, value)
using pfn_rpc_mem_init = void (*)(void); using pfn_rpc_mem_init = void (*)(void);
using pfn_rpc_mem_deinit = void (*)(void); using pfn_rpc_mem_deinit = void (*)(void);
using pfn_rpc_mem_alloc = void *(*)(int, uint32_t, int); using pfn_rpc_mem_alloc = void *(*)(int, uint32_t, int);
@ -204,6 +177,7 @@ static int g_current_device = QNN_BACKEND_GGML;
//Qualcomm CPU: Qualcomm Kryo CPU //Qualcomm CPU: Qualcomm Kryo CPU
//Qualcomm GPU: Qualcomm Adreno GPU //Qualcomm GPU: Qualcomm Adreno GPU
//Qualcomm NPU: aka HTP(Hexagon Tensor Processor), ~= cDSP(Compute DSP) + HMX(Hexagon Matrix eXtensions)/HTA(Hexagon Tensor Accelerator) //Qualcomm NPU: aka HTP(Hexagon Tensor Processor), ~= cDSP(Compute DSP) + HMX(Hexagon Matrix eXtensions)/HTA(Hexagon Tensor Accelerator)
static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = { static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
[QNN_BACKEND_CPU] = {.device = 0, .threads = 1, .name = "qnn-cpu", .lib = "libQnnCpu.so", .instance = nullptr, .backend = nullptr, .raw_interface = {}, .raw_system_interface = {}}, [QNN_BACKEND_CPU] = {.device = 0, .threads = 1, .name = "qnn-cpu", .lib = "libQnnCpu.so", .instance = nullptr, .backend = nullptr, .raw_interface = {}, .raw_system_interface = {}},
[QNN_BACKEND_GPU] = {.device = 1, .threads = 1, .name = "qnn-gpu", .lib = "libQnnGpu.so", .instance = nullptr, .backend = nullptr, .raw_interface = {}, .raw_system_interface = {}}, [QNN_BACKEND_GPU] = {.device = 1, .threads = 1, .name = "qnn-gpu", .lib = "libQnnGpu.so", .instance = nullptr, .backend = nullptr, .raw_interface = {}, .raw_system_interface = {}},
@ -227,221 +201,6 @@ static inline int validate_tensor_version(Qnn_Tensor_t tensor) {
} }
[[maybe_unused]] static inline int validate_op_config_version(Qnn_OpConfig_t op_config) {
if (op_config.version != QNN_OPCONFIG_VERSION_1) {
QNN_LOG_WARN("validate_op_config_version() op %s, got unsupported version %d\n",
op_config.v1.name,
op_config.version);
return 1;
}
return 0;
}
static inline const char * get_qnn_oponfig_name(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.name;
}
return nullptr;
}
[[maybe_unused]] static inline const char * get_qnn_oponfig_name(const Qnn_OpConfig_t * op_config) {
return get_qnn_oponfig_name(*op_config);
}
static inline const char * get_qnn_op_config_packagename(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.packageName;
}
return nullptr;
}
[[maybe_unused]] static inline const char * get_qnn_op_config_packagename(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_packagename(*op_config);
}
static inline const char * get_qnn_op_config_typename(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.typeName;
}
return nullptr;
}
[[maybe_unused]] static inline const char * get_qnn_op_config_typename(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_typename(*op_config);
}
static inline uint32_t get_qnn_op_config_numparams(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.numOfParams;
}
return 0u;
}
[[maybe_unused]] static inline uint32_t get_qnn_op_config_numparams(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_numparams(*op_config);
}
static inline const Qnn_Param_t * get_qnn_op_config_params(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.params;
}
return nullptr;
}
[[maybe_unused]] static inline const Qnn_Param_t * get_qnn_op_config_params(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_params(*op_config);
}
static inline uint32_t get_qnn_op_config_numinputs(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.numOfInputs;
}
return 0u;
}
[[maybe_unused]] static inline uint32_t get_qnn_op_config_numinputs(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_numinputs(*op_config);
}
static inline const Qnn_Tensor_t * get_qnn_op_config_inputs(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.inputTensors;
}
return nullptr;
}
[[maybe_unused]] static inline const Qnn_Tensor_t * get_qnn_op_config_inputs(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_inputs(*op_config);
}
static inline uint32_t get_qnn_op_config_numoutputs(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.numOfOutputs;
}
return 0u;
}
[[maybe_unused]] static inline uint32_t get_qnn_op_config_numoutputs(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_numoutputs(*op_config);
}
static inline const Qnn_Tensor_t * get_qnn_op_config_outputs(const Qnn_OpConfig_t & op_config) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
return op_config.v1.outputTensors;
}
return nullptr;
}
[[maybe_unused]] static inline const Qnn_Tensor_t * get_qnn_op_config_outputs(const Qnn_OpConfig_t * op_config) {
return get_qnn_op_config_outputs(*op_config);
}
static inline void set_qnn_op_config_name(Qnn_OpConfig_t & op_config, const char * name) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
op_config.v1.name = name;
}
}
[[maybe_unused]] static inline void set_qnn_op_config_name(Qnn_OpConfig_t * op_config, const char * name) {
set_qnn_op_config_name(*op_config, name);
}
static inline void set_qnn_op_config_packagename(Qnn_OpConfig_t & op_config, const char * package_name) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
op_config.v1.packageName = package_name;
}
}
[[maybe_unused]] static inline void set_qnn_op_config_packagename(Qnn_OpConfig_t * op_config, const char * package_name) {
set_qnn_op_config_packagename(*op_config, package_name);
}
static inline void set_qnn_op_config_typename(Qnn_OpConfig_t & op_config, const char * type_name) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
op_config.v1.typeName = type_name;
}
}
[[maybe_unused]] static inline void set_qnn_op_config_typename(Qnn_OpConfig_t * op_config, const char * type_name) {
set_qnn_op_config_typename(*op_config, type_name);
}
static inline void set_qnn_op_config_params(Qnn_OpConfig_t & op_config,
uint32_t num_of_params,
Qnn_Param_t * params) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
op_config.v1.numOfParams = num_of_params;
op_config.v1.params = params;
}
}
[[maybe_unused]] static inline void set_qnn_op_config_params(Qnn_OpConfig_t * op_config,
uint32_t num_of_params,
Qnn_Param_t * params) {
set_qnn_op_config_params(*op_config, num_of_params, params);
}
static inline void set_qnn_op_config_inputs(Qnn_OpConfig_t & op_config,
uint32_t num_of_inputs,
Qnn_Tensor_t * input_tensors) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
op_config.v1.numOfInputs = num_of_inputs;
op_config.v1.inputTensors = input_tensors;
}
}
[[maybe_unused]] static inline void set_qnn_op_config_inputs(Qnn_OpConfig_t * op_config,
uint32_t num_of_inputs,
Qnn_Tensor_t * input_tensors) {
set_qnn_op_config_inputs(*op_config, num_of_inputs, input_tensors);
}
static inline void set_qnn_op_config_outputs(Qnn_OpConfig_t & op_config,
uint32_t num_of_outputs,
Qnn_Tensor_t * output_tensors) {
if (op_config.version == QNN_OPCONFIG_VERSION_1) {
op_config.v1.numOfOutputs = num_of_outputs;
op_config.v1.outputTensors = output_tensors;
}
}
[[maybe_unused]] static inline void set_qnn_op_config_outputs(Qnn_OpConfig_t * op_config,
uint32_t num_of_outputs,
Qnn_Tensor_t * output_tensors) {
set_qnn_op_config_outputs(*op_config, num_of_outputs, output_tensors);
}
static inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t & tensor) { static inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.id; return tensor.v1.id;
@ -451,11 +210,6 @@ static inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t & tensor) {
} }
[[maybe_unused]] static inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t * tensor) {
return get_qnn_tensorid(*tensor);
}
static inline const char * get_qnn_tensorname(const Qnn_Tensor_t & tensor) { static inline const char * get_qnn_tensorname(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.name; return tensor.v1.name;
@ -464,10 +218,6 @@ static inline const char * get_qnn_tensorname(const Qnn_Tensor_t & tensor) {
} }
static inline const char * get_qnn_tensorname(const Qnn_Tensor_t * tensor) {
return get_qnn_tensorname(*tensor);
}
static inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t & tensor) { static inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
@ -477,11 +227,6 @@ static inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t & tensor) {
} }
[[maybe_unused]] static inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t * tensor) {
return get_qnn_tensortype(*tensor);
}
static inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t & tensor) { static inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.dataFormat; return tensor.v1.dataFormat;
@ -490,11 +235,6 @@ static inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_
} }
[[maybe_unused]] static inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_dataformat(*tensor);
}
static inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t & tensor) { static inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.dataType; return tensor.v1.dataType;
@ -503,11 +243,6 @@ static inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t & tensor
} }
[[maybe_unused]] static inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_datatype(*tensor);
}
static inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t & tensor) { static inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.quantizeParams; return tensor.v1.quantizeParams;
@ -516,11 +251,6 @@ static inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t
} }
[[maybe_unused]] static inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_quantparams(*tensor);
}
static inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t & tensor) { static inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.rank; return tensor.v1.rank;
@ -529,11 +259,6 @@ static inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t & tensor) {
} }
[[maybe_unused]] static inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_rank(*tensor);
}
static inline uint32_t * get_qnn_tensor_dimensions(const Qnn_Tensor_t & tensor) { static inline uint32_t * get_qnn_tensor_dimensions(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.dimensions; return tensor.v1.dimensions;
@ -542,11 +267,6 @@ static inline uint32_t * get_qnn_tensor_dimensions(const Qnn_Tensor_t & tensor)
} }
[[maybe_unused]] static inline uint32_t * get_qnn_tensor_dimensions(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_dimensions(*tensor);
}
static inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t & tensor) { static inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.memType; return tensor.v1.memType;
@ -555,37 +275,6 @@ static inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t & te
} }
[[maybe_unused]] static inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_memtype(*tensor);
}
static inline Qnn_ClientBuffer_t get_qnn_tensor_clientbuf(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.clientBuf;
}
return QNN_CLIENT_BUFFER_INIT;
}
[[maybe_unused]] static inline Qnn_ClientBuffer_t get_qnn_tensor_clientbuf(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_clientbuf(*tensor);
}
static inline Qnn_MemHandle_t get_qnn_tensor_memhandle(const Qnn_Tensor_t & tensor) {
if (tensor.version == QNN_TENSOR_VERSION_1) {
return tensor.v1.memHandle;
}
return nullptr;
}
[[maybe_unused]] static inline Qnn_MemHandle_t get_qnn_tensor_memhandle(const Qnn_Tensor_t * tensor) {
return get_qnn_tensor_memhandle(*tensor);
}
static inline void set_qnn_tensor_id(Qnn_Tensor_t & tensor, uint32_t id) { static inline void set_qnn_tensor_id(Qnn_Tensor_t & tensor, uint32_t id) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.id = id; tensor.v1.id = id;
@ -593,11 +282,6 @@ static inline void set_qnn_tensor_id(Qnn_Tensor_t & tensor, uint32_t id) {
} }
[[maybe_unused]] static inline void set_qnn_tensor_id(Qnn_Tensor_t * tensor, uint32_t id) {
set_qnn_tensor_id(*tensor, id);
}
static inline void set_qnn_tensor_name(Qnn_Tensor_t & tensor, const char * name) { static inline void set_qnn_tensor_name(Qnn_Tensor_t & tensor, const char * name) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.name = name; tensor.v1.name = name;
@ -605,11 +289,6 @@ static inline void set_qnn_tensor_name(Qnn_Tensor_t & tensor, const char * name)
} }
[[maybe_unused]] static inline void set_qnn_tensor_name(Qnn_Tensor_t * tensor, const char * name) {
set_qnn_tensor_name(*tensor, name);
}
static inline void set_qnn_tensor_type(Qnn_Tensor_t & tensor, Qnn_TensorType_t type) { static inline void set_qnn_tensor_type(Qnn_Tensor_t & tensor, Qnn_TensorType_t type) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.type = type; tensor.v1.type = type;
@ -617,11 +296,6 @@ static inline void set_qnn_tensor_type(Qnn_Tensor_t & tensor, Qnn_TensorType_t t
} }
[[maybe_unused]] static inline void set_qnn_tensor_type(Qnn_Tensor_t * tensor, Qnn_TensorType_t type) {
set_qnn_tensor_type(*tensor, type);
}
static inline void set_qnn_tensor_dataformat(Qnn_Tensor_t & tensor, Qnn_TensorDataFormat_t format) { static inline void set_qnn_tensor_dataformat(Qnn_Tensor_t & tensor, Qnn_TensorDataFormat_t format) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.dataFormat = format; tensor.v1.dataFormat = format;
@ -629,11 +303,6 @@ static inline void set_qnn_tensor_dataformat(Qnn_Tensor_t & tensor, Qnn_TensorDa
} }
[[maybe_unused]] static inline void set_qnn_tensor_dataformat(Qnn_Tensor_t * tensor, Qnn_TensorDataFormat_t format) {
set_qnn_tensor_dataformat(*tensor, format);
}
static inline void set_qnn_tensor_datatype(Qnn_Tensor_t & tensor, Qnn_DataType_t dataType) { static inline void set_qnn_tensor_datatype(Qnn_Tensor_t & tensor, Qnn_DataType_t dataType) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.dataType = dataType; tensor.v1.dataType = dataType;
@ -641,11 +310,6 @@ static inline void set_qnn_tensor_datatype(Qnn_Tensor_t & tensor, Qnn_DataType_t
} }
[[maybe_unused]] static inline void set_qnn_tensor_datatype(Qnn_Tensor_t * tensor, Qnn_DataType_t dataType) {
set_qnn_tensor_datatype(*tensor, dataType);
}
static inline void set_qnn_tensor_quantparams(Qnn_Tensor_t & tensor, Qnn_QuantizeParams_t params) { static inline void set_qnn_tensor_quantparams(Qnn_Tensor_t & tensor, Qnn_QuantizeParams_t params) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.quantizeParams = params; tensor.v1.quantizeParams = params;
@ -653,11 +317,6 @@ static inline void set_qnn_tensor_quantparams(Qnn_Tensor_t & tensor, Qnn_Quantiz
} }
[[maybe_unused]] static inline void set_qnn_tensor_quantparams(Qnn_Tensor_t * tensor, Qnn_QuantizeParams_t params) {
set_qnn_tensor_quantparams(*tensor, params);
}
static inline void set_qnn_tensor_rank(Qnn_Tensor_t & tensor, uint32_t rank) { static inline void set_qnn_tensor_rank(Qnn_Tensor_t & tensor, uint32_t rank) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.rank = rank; tensor.v1.rank = rank;
@ -665,11 +324,6 @@ static inline void set_qnn_tensor_rank(Qnn_Tensor_t & tensor, uint32_t rank) {
} }
[[maybe_unused]] static inline void set_qnn_tensor_rank(Qnn_Tensor_t * tensor, uint32_t rank) {
set_qnn_tensor_rank(*tensor, rank);
}
static inline void set_qnn_tensor_dimensions(Qnn_Tensor_t & tensor, uint32_t * dims) { static inline void set_qnn_tensor_dimensions(Qnn_Tensor_t & tensor, uint32_t * dims) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.dimensions = dims; tensor.v1.dimensions = dims;
@ -677,11 +331,6 @@ static inline void set_qnn_tensor_dimensions(Qnn_Tensor_t & tensor, uint32_t * d
} }
[[maybe_unused]] static inline void set_qnn_tensor_dimensions(Qnn_Tensor_t * tensor, uint32_t * dims) {
set_qnn_tensor_dimensions(*tensor, dims);
}
static inline void set_qnn_tensor_memtype(Qnn_Tensor_t & tensor, Qnn_TensorMemType_t memType) { static inline void set_qnn_tensor_memtype(Qnn_Tensor_t & tensor, Qnn_TensorMemType_t memType) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.memType = memType; tensor.v1.memType = memType;
@ -689,11 +338,6 @@ static inline void set_qnn_tensor_memtype(Qnn_Tensor_t & tensor, Qnn_TensorMemTy
} }
[[maybe_unused]] static inline void set_qnn_tensor_memtype(Qnn_Tensor_t * tensor, Qnn_TensorMemType_t memType) {
set_qnn_tensor_memtype(*tensor, memType);
}
static inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t & tensor, Qnn_ClientBuffer_t clientBuf) { static inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t & tensor, Qnn_ClientBuffer_t clientBuf) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.clientBuf = clientBuf; tensor.v1.clientBuf = clientBuf;
@ -701,11 +345,6 @@ static inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t & tensor, Qnn_ClientBuf
} }
[[maybe_unused]] static inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t * tensor, Qnn_ClientBuffer_t clientBuf) {
set_qnn_tensor_clientbuf(*tensor, clientBuf);
}
static inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle_t handle) { static inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle_t handle) {
if (tensor.version == QNN_TENSOR_VERSION_1) { if (tensor.version == QNN_TENSOR_VERSION_1) {
tensor.v1.memHandle = handle; tensor.v1.memHandle = handle;
@ -713,11 +352,6 @@ static inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle
} }
[[maybe_unused]] static inline void set_qnn_tensor_memhandle(Qnn_Tensor_t * tensor, Qnn_MemHandle_t handle) {
set_qnn_tensor_memhandle(*tensor, handle);
}
static size_t memscpy(void * dst, size_t dstSize, const void * src, size_t copySize) { static size_t memscpy(void * dst, size_t dstSize, const void * src, size_t copySize) {
if (!dst || !src || !dstSize || !copySize) if (!dst || !src || !dstSize || !copySize)
return 0; return 0;
@ -824,19 +458,6 @@ static int free_qnn_tensor(Qnn_Tensor_t & tensor) {
} }
[[maybe_unused]] static int free_qnn_tensors(Qnn_Tensor_t *& tensors, uint32_t num_tensors) {
int err = 0;
// free all pointer allocations in struct
for (size_t i = 0; i < num_tensors; i++) {
free_qnn_tensor(tensors[i]);
}
free(tensors);
return err;
}
static uint32_t ggml_get_tensor_rank(const ggml_tensor * tensor) { static uint32_t ggml_get_tensor_rank(const ggml_tensor * tensor) {
uint32_t rank = 0; uint32_t rank = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) { for (int i = 0; i < GGML_MAX_DIMS; i++) {
@ -3137,7 +2758,7 @@ static const char * ggml_backend_qnn_buffer_get_name(ggml_backend_buffer_t buffe
} }
[[maybe_unused]] GGML_CALL static bool ggml_backend_buffer_is_qnn(ggml_backend_buffer_t buffer) { GGML_CALL static bool ggml_backend_buffer_is_qnn(ggml_backend_buffer_t buffer) {
return buffer->iface.get_name == ggml_backend_qnn_buffer_get_name; return buffer->iface.get_name == ggml_backend_qnn_buffer_get_name;
} }
@ -3236,15 +2857,6 @@ GGML_CALL static void ggml_backend_qnn_buffer_clear(ggml_backend_buffer_t buffer
} }
[[maybe_unused]] GGML_CALL static void ggml_backend_qnn_buffer_reset(ggml_backend_buffer_t buffer) {
ggml_backend_qnn_buffer_context * ctx = (ggml_backend_qnn_buffer_context *) buffer->context;
for (auto * sub_buffer : ctx->sub_buffers) {
free(sub_buffer);
}
ctx->sub_buffers.clear();
}
static ggml_backend_buffer_i ggml_backend_qnn_buffer_interface = { static ggml_backend_buffer_i ggml_backend_qnn_buffer_interface = {
/* .get_name = */ ggml_backend_qnn_buffer_get_name, /* .get_name = */ ggml_backend_qnn_buffer_get_name,
/* .free_buffer = */ ggml_backend_qnn_buffer_free_buffer, /* .free_buffer = */ ggml_backend_qnn_buffer_free_buffer,
@ -3402,7 +3014,7 @@ GGML_CALL static bool ggml_backend_qnn_supports_op(ggml_backend_t backend, const
//note: this function be used with proposal/refined ggml backend subsystem in this PR: //note: this function be used with proposal/refined ggml backend subsystem in this PR:
// https://github.com/ggerganov/llama.cpp/pull/7641 // https://github.com/ggerganov/llama.cpp/pull/7641
// any ggml backend(only using system memory: ggml_backend_xxx_buffer_is_host return true) // new ggml backend(only using system memory: ggml_backend_xxx_buffer_is_host return true)
// can following this style for mixed inference between CPU&GPU / CPU&NPU very easily // can following this style for mixed inference between CPU&GPU / CPU&NPU very easily
GGML_CALL static bool ggml_backend_qnn_offload_op(ggml_backend_t backend, const ggml_tensor * tensor) { GGML_CALL static bool ggml_backend_qnn_offload_op(ggml_backend_t backend, const ggml_tensor * tensor) {
GGML_UNUSED(backend); GGML_UNUSED(backend);