move tensor related function to utils

This commit is contained in:
hongruichen 2024-07-05 18:38:20 +08:00
parent 58cec14092
commit 0f2e68713c
3 changed files with 134 additions and 129 deletions

View file

@ -35,13 +35,6 @@
#include "ggml-qnn/tensor.hpp" #include "ggml-qnn/tensor.hpp"
#include "ggml-qnn/utils.hpp" #include "ggml-qnn/utils.hpp"
// =================================================================================================
//
// forward declaration
//
// =================================================================================================
static int free_qnn_tensor(Qnn_Tensor_t &tensor);
// ================================================================================================= // =================================================================================================
// //
// self-defined macro / data structure // self-defined macro / data structure
@ -128,7 +121,7 @@ struct ggml_backend_qnn_buffer_context {
} }
for (auto *qnn_tensor : qnn_tensors) { for (auto *qnn_tensor : qnn_tensors) {
free_qnn_tensor(*qnn_tensor); qnn::device_tensor_free(*qnn_tensor);
free(qnn_tensor); free(qnn_tensor);
} }
@ -156,95 +149,6 @@ struct ggml_backend_qnn_buffer_type_context {
// QNN backend internal helper functions // QNN backend internal helper functions
// //
// ================================================================================================= // =================================================================================================
static size_t memscpy(void *dst, size_t dst_size, const void *src, size_t copy_size) {
if (!dst || !src || !dst_size || !copy_size) return 0;
size_t min_size = dst_size < copy_size ? dst_size : copy_size;
memcpy(dst, src, min_size);
return min_size;
}
static int deep_copy_qnn_tensors(Qnn_Tensor_t &src, Qnn_Tensor_t &dst) {
int err = 0;
VALIDATE_TENSOR_VERSION(src, err);
dst.version = src.version;
QNN_TENSOR_SET_NAME(dst, ::strndup(QNN_TENSOR_GET_NAME(src), std::string(QNN_TENSOR_GET_NAME(src)).size()));
if (nullptr == QNN_TENSOR_GET_NAME(dst)) {
return 1;
}
QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src));
QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src));
QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src));
QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src));
QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSOR_GET_MEM_TYPE(src));
if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_RAW) {
Qnn_ClientBuffer_t client_buf = { nullptr, 0 };
QNN_TENSOR_SET_CLIENT_BUF(dst, client_buf);
} else if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_MEMHANDLE) {
QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr);
} else {
return 1;
}
Qnn_QuantizeParams_t src_qparam = QNN_TENSOR_GET_QUANT_PARAMS(src);
Qnn_QuantizationEncoding_t encoding = src_qparam.quantizationEncoding;
if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
Qnn_QuantizeParams_t src_qparam_cpy = src_qparam;
Qnn_AxisScaleOffset_t &axis_scale_offset = src_qparam_cpy.axisScaleOffsetEncoding;
Qnn_ScaleOffset_t **scaleOffset = &axis_scale_offset.scaleOffset;
size_t scaleOffsetSize = axis_scale_offset.numScaleOffsets * sizeof(Qnn_ScaleOffset_t);
*scaleOffset = (Qnn_ScaleOffset_t *)malloc(scaleOffsetSize);
memscpy(*scaleOffset, scaleOffsetSize, src_qparam.axisScaleOffsetEncoding.scaleOffset, scaleOffsetSize);
QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam_cpy);
} else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) {
Qnn_QuantizeParams_t src_qparam_cpy = src_qparam;
Qnn_BwAxisScaleOffset_t &bwaxis_scale_offset = src_qparam_cpy.bwAxisScaleOffsetEncoding;
size_t scaleSize = bwaxis_scale_offset.numElements * sizeof(float);
float **scales = &bwaxis_scale_offset.scales;
int32_t **offsets = &bwaxis_scale_offset.offsets;
*scales = (float *)malloc(scaleSize);
memscpy(*scales, scaleSize, src_qparam.bwAxisScaleOffsetEncoding.scales, scaleSize);
if (bwaxis_scale_offset.offsets != nullptr) {
size_t offsetSize = bwaxis_scale_offset.numElements * sizeof(int32_t);
*offsets = (int32_t *)malloc(offsetSize);
memscpy(*offsets, offsetSize, src_qparam.bwAxisScaleOffsetEncoding.offsets, offsetSize);
}
QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam_cpy);
} else {
QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam);
}
uint32_t rank = QNN_TENSOR_GET_RANK(src);
QNN_TENSOR_SET_RANK(dst, rank);
size_t dim_size = rank * sizeof(uint32_t);
uint32_t *dimensions = (uint32_t *)malloc(dim_size);
if (dimensions == nullptr) {
QNN_LOG_WARN(
"deep_copy_qnn_tensors() allocation error while copying "
"tensor %s\n",
QNN_TENSOR_GET_NAME(src));
return 1;
}
memscpy(dimensions, dim_size, QNN_TENSOR_GET_DIMENSIONS(src), dim_size);
QNN_TENSOR_SET_DIMENSIONS(dst, dimensions);
return err;
}
static int free_qnn_tensor(Qnn_Tensor_t &tensor) {
int err = 0;
VALIDATE_TENSOR_VERSION(tensor, err);
free((void *)QNN_TENSOR_GET_NAME(tensor));
free(QNN_TENSOR_GET_DIMENSIONS(tensor));
return err;
}
// ================================================================================================= // =================================================================================================
// //
@ -335,9 +239,14 @@ GGML_CALL static void *ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t bu
} }
GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) { GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) {
Qnn_ErrorHandle_t error = QNN_SUCCESS;
ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context; ggml_backend_qnn_buffer_context *ctx = (ggml_backend_qnn_buffer_context *)buffer->context;
Qnn_Tensor_t *p_qnn_tensor = (Qnn_Tensor_t *)calloc(1, sizeof(Qnn_Tensor_t));
if (!p_qnn_tensor) {
QNN_LOG_WARN("calloc failed");
return;
}
static int idx = 0; static int idx = 0;
char tensor_name[GGML_MAX_NAME] = { 0 }; char tensor_name[GGML_MAX_NAME] = { 0 };
snprintf(tensor_name, GGML_MAX_NAME, "tensor_%04d", idx++); snprintf(tensor_name, GGML_MAX_NAME, "tensor_%04d", idx++);
@ -352,39 +261,23 @@ GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t
} else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) { } else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) {
qnn_tensor_type = QNN_TENSOR_TYPE_APP_READ; qnn_tensor_type = QNN_TENSOR_TYPE_APP_READ;
} }
Qnn_Tensor_t qnn_tensor = QNN_TENSOR_INIT;
Qnn_TensorMemType_t qnn_mem_type = QNN_TENSORMEMTYPE_RAW; Qnn_TensorMemType_t qnn_mem_type = QNN_TENSORMEMTYPE_RAW;
if (ctx->device == QNN_BACKEND_GPU) { if (ctx->device == QNN_BACKEND_GPU) {
qnn_mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; qnn_mem_type = QNN_TENSORMEMTYPE_MEMHANDLE;
} }
qnn_tensor = { .version = QNN_TENSOR_VERSION_1, Qnn_Tensor_t qnn_tensor;
{ .v1 = { qnn::device_tensor_init(qnn_tensor, qnn::get_ggml_tensor_rank(tensor), qnn_mem_type, tensor_name, qnn_tensor_type,
.id = 0, qnn_data_type, dimensions);
.name = tensor_name,
.type = qnn_tensor_type,
.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType = qnn_data_type,
.quantizeParams = { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{ .scaleOffsetEncoding = { .scale = 0.0000000000000000f, .offset = 0 } } },
.rank = qnn::get_ggml_tensor_rank(tensor),
.dimensions = dimensions,
.memType = qnn_mem_type,
{ .clientBuf = { .data = nullptr, .dataSize = 0 } } } } };
Qnn_Tensor_t *p_qnn_tensor = (Qnn_Tensor_t *)calloc(1, sizeof(Qnn_Tensor_t)); Qnn_ErrorHandle_t error = qnn::device_tensor_deep_copy(qnn_tensor, *p_qnn_tensor);
if (nullptr == p_qnn_tensor) {
QNN_LOG_WARN("calloc failed");
return;
}
error = deep_copy_qnn_tensors(qnn_tensor, *p_qnn_tensor);
if (error != QNN_SUCCESS) { if (error != QNN_SUCCESS) {
free(p_qnn_tensor); free(p_qnn_tensor);
QNN_LOG_WARN("init tensor failed"); QNN_LOG_WARN("init tensor failed");
return; return;
} }
tensor->extra = p_qnn_tensor; tensor->extra = p_qnn_tensor;
ctx->qnn_tensors.push_back(p_qnn_tensor); ctx->qnn_tensors.push_back(p_qnn_tensor);
} }

View file

@ -5,6 +5,20 @@
#include "qnn-types.hpp" #include "qnn-types.hpp"
namespace {
size_t memscpy(void *dst, size_t dst_size, const void *src, size_t copy_size) {
if (!dst || !src || !dst_size || !copy_size) return 0;
size_t min_size = dst_size < copy_size ? dst_size : copy_size;
memcpy(dst, src, min_size);
return min_size;
}
} // namespace
namespace qnn { namespace qnn {
// TODO: mapping more ggml data type to QNN data type // TODO: mapping more ggml data type to QNN data type
@ -121,4 +135,105 @@ const char *opname_from_ggmlop(enum ggml_op ggmlop) {
return nullptr; return nullptr;
} }
void device_tensor_init(Qnn_Tensor_t &tensor, uint32_t rank, Qnn_TensorMemType_t mem_type, const char *tensor_name,
Qnn_TensorType_t qnn_tensor_type, Qnn_DataType_t qnn_data_type, uint32_t *dimensions) {
tensor = QNN_TENSOR_INIT;
tensor = { .version = QNN_TENSOR_VERSION_1,
{ .v1 = { .id = 0,
.name = tensor_name,
.type = qnn_tensor_type,
.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType = qnn_data_type,
.quantizeParams = { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{ .scaleOffsetEncoding = { .scale = 0.0000000000000000f, .offset = 0 } } },
.rank = rank,
.dimensions = dimensions,
.memType = mem_type,
{ .clientBuf = {} } } } };
}
Qnn_ErrorHandle_t device_tensor_deep_copy(const Qnn_Tensor_t &src, Qnn_Tensor_t &dst) {
Qnn_ErrorHandle_t err = validate_tensor_version(src);
if (err != QNN_SUCCESS) {
QNN_LOG_WARN("validate_tensor_version expected QNN_SUCCESS\n");
return err;
}
dst.version = src.version;
QNN_TENSOR_SET_NAME(dst, ::strndup(QNN_TENSOR_GET_NAME(src), std::string(QNN_TENSOR_GET_NAME(src)).size()));
if (nullptr == QNN_TENSOR_GET_NAME(dst)) {
return (Qnn_ErrorHandle_t)1;
}
QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src));
QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src));
QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src));
QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src));
QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSOR_GET_MEM_TYPE(src));
if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_RAW) {
Qnn_ClientBuffer_t client_buf = { nullptr, 0 };
QNN_TENSOR_SET_CLIENT_BUF(dst, client_buf);
} else if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_MEMHANDLE) {
QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr);
} else {
return (Qnn_ErrorHandle_t)1;
}
Qnn_QuantizeParams_t src_qparam = QNN_TENSOR_GET_QUANT_PARAMS(src);
Qnn_QuantizationEncoding_t encoding = src_qparam.quantizationEncoding;
if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
Qnn_QuantizeParams_t src_qparam_cpy = src_qparam;
Qnn_AxisScaleOffset_t &axis_scale_offset = src_qparam_cpy.axisScaleOffsetEncoding;
Qnn_ScaleOffset_t **scaleOffset = &axis_scale_offset.scaleOffset;
size_t scaleOffsetSize = axis_scale_offset.numScaleOffsets * sizeof(Qnn_ScaleOffset_t);
*scaleOffset = (Qnn_ScaleOffset_t *)malloc(scaleOffsetSize);
memscpy(*scaleOffset, scaleOffsetSize, src_qparam.axisScaleOffsetEncoding.scaleOffset, scaleOffsetSize);
QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam_cpy);
} else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) {
Qnn_QuantizeParams_t src_qparam_cpy = src_qparam;
Qnn_BwAxisScaleOffset_t &bwaxis_scale_offset = src_qparam_cpy.bwAxisScaleOffsetEncoding;
size_t scaleSize = bwaxis_scale_offset.numElements * sizeof(float);
float **scales = &bwaxis_scale_offset.scales;
int32_t **offsets = &bwaxis_scale_offset.offsets;
*scales = (float *)malloc(scaleSize);
memscpy(*scales, scaleSize, src_qparam.bwAxisScaleOffsetEncoding.scales, scaleSize);
if (bwaxis_scale_offset.offsets != nullptr) {
size_t offsetSize = bwaxis_scale_offset.numElements * sizeof(int32_t);
*offsets = (int32_t *)malloc(offsetSize);
memscpy(*offsets, offsetSize, src_qparam.bwAxisScaleOffsetEncoding.offsets, offsetSize);
}
QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam_cpy);
} else {
QNN_TENSOR_SET_QUANT_PARAMS(dst, src_qparam);
}
uint32_t rank = QNN_TENSOR_GET_RANK(src);
QNN_TENSOR_SET_RANK(dst, rank);
size_t dim_size = rank * sizeof(uint32_t);
uint32_t *dimensions = (uint32_t *)malloc(dim_size);
if (dimensions == nullptr) {
QNN_LOG_WARN(
"deep_copy_qnn_tensors() allocation error while copying "
"tensor %s\n",
QNN_TENSOR_GET_NAME(src));
return (Qnn_ErrorHandle_t)1;
}
memscpy(dimensions, dim_size, QNN_TENSOR_GET_DIMENSIONS(src), dim_size);
QNN_TENSOR_SET_DIMENSIONS(dst, dimensions);
return err;
}
void device_tensor_free(Qnn_Tensor_t &tensor) {
if (validate_tensor_version(tensor) != QNN_SUCCESS) {
QNN_LOG_WARN("validate_tensor_version expected QNN_SUCCESS\n");
return;
}
free((void *)QNN_TENSOR_GET_NAME(tensor));
free(QNN_TENSOR_GET_DIMENSIONS(tensor));
}
} // namespace qnn } // namespace qnn

View file

@ -169,6 +169,13 @@ inline void set_qnn_tensor_memhandle(Qnn_Tensor_t &tensor, Qnn_MemHandle_t handl
} }
} }
void device_tensor_init(Qnn_Tensor_t &tensor, uint32_t rank, Qnn_TensorMemType_t mem_type, const char *tensor_name,
Qnn_TensorType_t qnn_tensor_type, Qnn_DataType_t qnn_data_type, uint32_t *dimensions);
Qnn_ErrorHandle_t device_tensor_deep_copy(const Qnn_Tensor_t &src, Qnn_Tensor_t &dst);
void device_tensor_free(Qnn_Tensor_t &tensor);
#if ENABLE_QNNBACKEND_PERF #if ENABLE_QNNBACKEND_PERF
class qnn_perf { class qnn_perf {
public: public:
@ -206,15 +213,6 @@ public:
} // namespace qnn } // namespace qnn
#define VALIDATE(value, status) \
do { \
status = value; \
if (status != QNN_SUCCESS) { \
QNN_LOG_WARN("%s expected QNN_SUCCESS\n", #value); \
return status; \
} \
} while (0)
#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor) #define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor)
#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor) #define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor)
#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor) #define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor)
@ -236,4 +234,3 @@ public:
#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value) #define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value)
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value) #define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value)
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value) #define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value)
#define VALIDATE_TENSOR_VERSION(tensor, err) VALIDATE(qnn::validate_tensor_version(tensor), err)