restore some modifications

2024-11-26 07:32:39 +00:00 · 2024-11-26 07:32:39 +00:00 · e05a398fb3
commit e05a398fb3
parent cf6b987be3
2 changed files with 11 additions and 10 deletions
--- a/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/ggml/src/ggml-cann/aclnn_ops.cpp
@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
                          aclTensor* acl_weight, aclTensor* acl_dst) {
    int8_t cube_math_type = 1;  // ALLOW_FP32_DOWN_PRECISION, when input is
                                // fp32, atlas a2 will transpose it to HFLOAT32.
    uint64_t workspaceSize = 0;
    aclOpExecutor* executor;
    void* workspaceAddr = nullptr;
@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
    size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size;
    size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3];
-    // scale stored at the end of weight.
+    // scale stored at the end of weight. Also need transpose.
    // scale need transpose.
    size_t scale_elem_size = sizeof(uint16_t);
    size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size};
    size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size;
@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
            int64_t batch0 = (n0 * src0->ne[2]) + c0;
            aclTensor* acl_input_tensor = ggml_cann_create_tensor(
-                (char*)input_buffer + batch1 * input_stride,
+                (char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
                ACL_FLOAT16,
                input_elem_size, input_ne, input_nb, 2);
            // first split
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
@ -301,7 +301,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
 */
 struct ggml_cann_pool_vmm : public ggml_cann_pool {
    /**
-     * @brief The maximum size of the virtual memory pool.
+     * @brief The maximum size of the virtual memory pool (32 GB).
     */
    size_t max_size;
@ -483,12 +483,14 @@ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
 */
 struct ggml_backend_cann_buffer_context {
    int32_t device;  ///< The device ID associated with this buffer context.
-    void* dev_ptr = nullptr;
+    void* dev_ptr =
        nullptr;  ///< Pointer to the device memory allocated for the buffer.
    /**
     * @brief Constructor to initialize the CANN buffer context.
     *
     * @param device The device ID associated with this buffer context.
     * @param dev_ptr Pointer to the device memory allocated for the buffer.
     */
    ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
        : device(device),