restore some modifications

This commit is contained in:
shanshan shen 2024-11-26 07:32:39 +00:00
parent cf6b987be3
commit e05a398fb3
2 changed files with 11 additions and 10 deletions

View file

@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
aclTensor* acl_weight, aclTensor* acl_dst) { aclTensor* acl_weight, aclTensor* acl_dst) {
int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is
// fp32, atlas a2 will transpose it to HFLOAT32. // fp32, atlas a2 will transpose it to HFLOAT32.
uint64_t workspaceSize = 0; uint64_t workspaceSize = 0;
aclOpExecutor* executor; aclOpExecutor* executor;
void* workspaceAddr = nullptr; void* workspaceAddr = nullptr;
@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size; size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size;
size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3]; size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3];
// scale stored at the end of weight. // scale stored at the end of weight. Also need transpose.
// scale need transpose.
size_t scale_elem_size = sizeof(uint16_t); size_t scale_elem_size = sizeof(uint16_t);
size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size}; size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size};
size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size; size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size;
@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
int64_t batch0 = (n0 * src0->ne[2]) + c0; int64_t batch0 = (n0 * src0->ne[2]) + c0;
aclTensor* acl_input_tensor = ggml_cann_create_tensor( aclTensor* acl_input_tensor = ggml_cann_create_tensor(
(char*)input_buffer + batch1 * input_stride, (char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
ACL_FLOAT16,
input_elem_size, input_ne, input_nb, 2); input_elem_size, input_ne, input_nb, 2);
// first split // first split

View file

@ -301,7 +301,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
*/ */
struct ggml_cann_pool_vmm : public ggml_cann_pool { struct ggml_cann_pool_vmm : public ggml_cann_pool {
/** /**
* @brief The maximum size of the virtual memory pool. * @brief The maximum size of the virtual memory pool (32 GB).
*/ */
size_t max_size; size_t max_size;
@ -483,12 +483,14 @@ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
*/ */
struct ggml_backend_cann_buffer_context { struct ggml_backend_cann_buffer_context {
int32_t device; ///< The device ID associated with this buffer context. int32_t device; ///< The device ID associated with this buffer context.
void* dev_ptr = nullptr; void* dev_ptr =
nullptr; ///< Pointer to the device memory allocated for the buffer.
/** /**
* @brief Constructor to initialize the CANN buffer context. * @brief Constructor to initialize the CANN buffer context.
* *
* @param device The device ID associated with this buffer context. * @param device The device ID associated with this buffer context.
* @param dev_ptr Pointer to the device memory allocated for the buffer.
*/ */
ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr) ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
: device(device), : device(device),