From e05a398fb3abb9dd94af4fccdcd1cf6b082829d3 Mon Sep 17 00:00:00 2001 From: shanshan shen Date: Tue, 26 Nov 2024 07:32:39 +0000 Subject: [PATCH] restore some modifications --- ggml/src/ggml-cann/aclnn_ops.cpp | 13 ++++++------- ggml/src/ggml-cann/ggml-cann.cpp | 8 +++++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 25ffd9126..881e50ac8 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input, aclTensor* acl_weight, aclTensor* acl_dst) { int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is // fp32, atlas a2 will transpose it to HFLOAT32. + uint64_t workspaceSize = 0; aclOpExecutor* executor; void* workspaceAddr = nullptr; @@ -2531,7 +2532,7 @@ static void aclnn_mat_mul_3d(ggml_backend_cann_context& ctx, aclTensor* acl_inpu * multiplication will be stored. */ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx, - ggml_tensor* dst) { + ggml_tensor* dst) { ggml_tensor* weight = dst->src[0]; // weight ggml_tensor* input = dst->src[1]; // input @@ -2596,8 +2597,8 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx, * multiplication will be stored. */ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx, - ggml_tensor* dst, - const enum ggml_type type) { + ggml_tensor* dst, + const enum ggml_type type) { ggml_tensor* src0 = dst->src[0]; // weight ggml_tensor* src1 = dst->src[1]; // input @@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx, size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size; size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3]; - // scale stored at the end of weight. - // scale need transpose. + // scale stored at the end of weight. Also need transpose. size_t scale_elem_size = sizeof(uint16_t); size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size}; size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size; @@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx, int64_t batch0 = (n0 * src0->ne[2]) + c0; aclTensor* acl_input_tensor = ggml_cann_create_tensor( - (char*)input_buffer + batch1 * input_stride, - ACL_FLOAT16, + (char*)input_buffer + batch1 * input_stride, ACL_FLOAT16, input_elem_size, input_ne, input_nb, 2); // first split diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 3bb166694..ebccc51de 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -301,7 +301,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { */ struct ggml_cann_pool_vmm : public ggml_cann_pool { /** - * @brief The maximum size of the virtual memory pool. + * @brief The maximum size of the virtual memory pool (32 GB). */ size_t max_size; @@ -483,12 +483,14 @@ std::unique_ptr ggml_backend_cann_context::new_pool_for_device( */ struct ggml_backend_cann_buffer_context { int32_t device; ///< The device ID associated with this buffer context. - void* dev_ptr = nullptr; + void* dev_ptr = + nullptr; ///< Pointer to the device memory allocated for the buffer. /** * @brief Constructor to initialize the CANN buffer context. * * @param device The device ID associated with this buffer context. + * @param dev_ptr Pointer to the device memory allocated for the buffer. */ ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr) : device(device), @@ -497,7 +499,7 @@ struct ggml_backend_cann_buffer_context { /** * @brief Destructor to free the device memory allocated for the buffer. */ - ~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr));} + ~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); } }; /**