From 4b0f6b0cd6f24b16a2fc8022345161811c01bcc2 Mon Sep 17 00:00:00 2001 From: hongruichen Date: Fri, 5 Jul 2024 19:34:56 +0800 Subject: [PATCH] add helper function to get Qnn_TensorType_t from ggml_tensor --- ggml/src/ggml-qnn.cpp | 16 ++++------------ ggml/src/ggml-qnn/tensor.hpp | 4 ++-- ggml/src/ggml-qnn/utils.cpp | 16 ++++++++++++++-- ggml/src/ggml-qnn/utils.hpp | 4 +++- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/ggml/src/ggml-qnn.cpp b/ggml/src/ggml-qnn.cpp index d6feea043..632ce8ee5 100644 --- a/ggml/src/ggml-qnn.cpp +++ b/ggml/src/ggml-qnn.cpp @@ -250,23 +250,15 @@ GGML_CALL static void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t static int idx = 0; char tensor_name[GGML_MAX_NAME] = { 0 }; snprintf(tensor_name, GGML_MAX_NAME, "tensor_%04d", idx++); - - uint32_t dimensions[] = { (uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], (uint32_t)tensor->ne[2], - (uint32_t)tensor->ne[3] }; - Qnn_DataType_t qnn_data_type = qnn::datatype_from_ggml_datatype(tensor->type); - Qnn_TensorType_t qnn_tensor_type = QNN_TENSOR_TYPE_APP_WRITE; - - if (tensor->flags & GGML_TENSOR_FLAG_INPUT) { - qnn_tensor_type = QNN_TENSOR_TYPE_APP_WRITE; - } else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) { - qnn_tensor_type = QNN_TENSOR_TYPE_APP_READ; - } - + Qnn_DataType_t qnn_data_type = qnn::device_datatype_from_ggml_datatype(tensor->type); + Qnn_TensorType_t qnn_tensor_type = qnn::device_tensortype_from_ggml_tensor(tensor); Qnn_TensorMemType_t qnn_mem_type = QNN_TENSORMEMTYPE_RAW; if (ctx->device == QNN_BACKEND_GPU) { qnn_mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; } + uint32_t dimensions[] = { (uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], (uint32_t)tensor->ne[2], + (uint32_t)tensor->ne[3] }; Qnn_Tensor_t qnn_tensor; qnn::device_tensor_init(qnn_tensor, qnn::get_ggml_tensor_rank(tensor), qnn_mem_type, tensor_name, qnn_tensor_type, qnn_data_type, dimensions); diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/tensor.hpp index 0ec75c03f..8a9196616 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/tensor.hpp @@ -17,7 +17,7 @@ public: ggml_backend_qnn_context *ctx) : _tensor(tensor), _qnn_tensor(reinterpret_cast(tensor->extra)), _context(ctx) { _old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions; - const auto qnn_data_type = datatype_from_ggml_datatype(tensor->type); + const auto qnn_data_type = device_datatype_from_ggml_datatype(tensor->type); const bool is_npu = ctx->device == QNN_BACKEND_NPU; QNN_VER_PTR(*_qnn_tensor)->type = _tensorType; if (is_npu) { @@ -67,7 +67,7 @@ public: ggml_backend_qnn_context *ctx) : _tensor(tensor), _qnn_tensor(qnn_tensor), _context(ctx) { _old_dimensions = QNN_VER_PTR(*_qnn_tensor)->dimensions; - const auto qnn_data_type = qnn::datatype_from_ggml_datatype(tensor->type); + const auto qnn_data_type = device_datatype_from_ggml_datatype(tensor->type); const bool is_npu = ctx->device == QNN_BACKEND_NPU; _dimensions[0] = (uint32_t)tensor->ne[0]; diff --git a/ggml/src/ggml-qnn/utils.cpp b/ggml/src/ggml-qnn/utils.cpp index 89982449a..7c25314f7 100644 --- a/ggml/src/ggml-qnn/utils.cpp +++ b/ggml/src/ggml-qnn/utils.cpp @@ -23,8 +23,8 @@ namespace qnn { // TODO: mapping more ggml data type to QNN data type // ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684 -Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) { - switch (ggmltype) { +Qnn_DataType_t device_datatype_from_ggml_datatype(ggml_type ggml_type) { + switch (ggml_type) { case GGML_TYPE_F16: return QNN_DATATYPE_FLOAT_16; case GGML_TYPE_F32: @@ -41,6 +41,18 @@ Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype) { return QNN_DATATYPE_UNDEFINED; } +Qnn_TensorType_t device_tensortype_from_ggml_tensor(ggml_tensor *ggml_tensor) { + Qnn_TensorType_t qnn_tensor_type = QNN_TENSOR_TYPE_APP_WRITE; + + if (ggml_tensor->flags & GGML_TENSOR_FLAG_INPUT) { + qnn_tensor_type = QNN_TENSOR_TYPE_APP_WRITE; + } else if (ggml_tensor->flags & GGML_TENSOR_FLAG_OUTPUT) { + qnn_tensor_type = QNN_TENSOR_TYPE_APP_READ; + } + + return qnn_tensor_type; +} + uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) { uint32_t rank = 0; for (int i = 0; i < GGML_MAX_DIMS; i++) { diff --git a/ggml/src/ggml-qnn/utils.hpp b/ggml/src/ggml-qnn/utils.hpp index aa824379a..87d908f1e 100644 --- a/ggml/src/ggml-qnn/utils.hpp +++ b/ggml/src/ggml-qnn/utils.hpp @@ -15,7 +15,6 @@ namespace qnn { -Qnn_DataType_t datatype_from_ggml_datatype(enum ggml_type ggmltype); uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor); const char *get_backend_name(int n_backend_type); const char *get_chipset_desc(uint32_t chipset_id); @@ -169,6 +168,9 @@ inline void set_qnn_tensor_memhandle(Qnn_Tensor_t &tensor, Qnn_MemHandle_t handl } } +Qnn_DataType_t device_datatype_from_ggml_datatype(ggml_type ggml_type); +Qnn_TensorType_t device_tensortype_from_ggml_tensor(ggml_tensor *ggml_tensor); + void device_tensor_init(Qnn_Tensor_t &tensor, uint32_t rank, Qnn_TensorMemType_t mem_type, const char *tensor_name, Qnn_TensorType_t qnn_tensor_type, Qnn_DataType_t qnn_data_type, uint32_t *dimensions);