restore some modifications

This commit is contained in:
shanshan shen 2024-11-26 07:32:39 +00:00
parent cf6b987be3
commit e05a398fb3
2 changed files with 11 additions and 10 deletions

View file

@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
aclTensor* acl_weight, aclTensor* acl_dst) {
int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is
// fp32, atlas a2 will transpose it to HFLOAT32.
uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;
@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
size_t weight_stride = src0->ne[1] * src0->ne[0] * weight_elem_size;
size_t weight_size = weight_stride * src0->ne[2] * src0->ne[3];
// scale stored at the end of weight.
// scale need transpose.
// scale stored at the end of weight. Also need transpose.
size_t scale_elem_size = sizeof(uint16_t);
size_t scale_nb[] = {src0->ne[0] / QK8_0 * scale_elem_size, scale_elem_size};
size_t scale_stride = src0->ne[1] * src0->ne[0] / QK8_0 * scale_elem_size;
@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
int64_t batch0 = (n0 * src0->ne[2]) + c0;
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
(char*)input_buffer + batch1 * input_stride,
ACL_FLOAT16,
(char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
input_elem_size, input_ne, input_nb, 2);
// first split

View file

@ -301,7 +301,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
*/
struct ggml_cann_pool_vmm : public ggml_cann_pool {
/**
* @brief The maximum size of the virtual memory pool.
* @brief The maximum size of the virtual memory pool (32 GB).
*/
size_t max_size;
@ -483,12 +483,14 @@ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
*/
struct ggml_backend_cann_buffer_context {
int32_t device; ///< The device ID associated with this buffer context.
void* dev_ptr = nullptr;
void* dev_ptr =
nullptr; ///< Pointer to the device memory allocated for the buffer.
/**
* @brief Constructor to initialize the CANN buffer context.
*
* @param device The device ID associated with this buffer context.
* @param dev_ptr Pointer to the device memory allocated for the buffer.
*/
ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
: device(device),