add ggml_cann prefix for acl funcs
This commit is contained in:
parent
96e09b979d
commit
57197b74b0
4 changed files with 302 additions and 284 deletions
|
@ -25,7 +25,7 @@
|
|||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
aclDataType type_mapping(ggml_type type) {
|
||||
aclDataType ggml_cann_type_mapping(ggml_type type) {
|
||||
switch (type) {
|
||||
case GGML_TYPE_F32:
|
||||
return ACL_FLOAT;
|
||||
|
@ -43,8 +43,9 @@ aclDataType type_mapping(ggml_type type) {
|
|||
return ACL_DT_UNDEFINED;
|
||||
}
|
||||
|
||||
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne, size_t* nb,
|
||||
int64_t dims, aclFormat format, size_t offset) {
|
||||
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
||||
size_t* nb, int64_t dims, aclFormat format,
|
||||
size_t offset) {
|
||||
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
|
||||
// added.
|
||||
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
||||
|
@ -71,15 +72,15 @@ aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne, size_t* nb,
|
|||
std::reverse(acl_ne, acl_ne + final_dims);
|
||||
std::reverse(acl_stride, acl_stride + final_dims);
|
||||
|
||||
aclTensor* acl_tensor =
|
||||
aclCreateTensor(acl_ne, final_dims, type_mapping(tensor->type),
|
||||
acl_stride, offset / ggml_element_size(tensor), format,
|
||||
&acl_storage_len, 1, tensor->data);
|
||||
aclTensor* acl_tensor = aclCreateTensor(
|
||||
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
|
||||
offset / ggml_element_size(tensor), format, &acl_storage_len, 1,
|
||||
tensor->data);
|
||||
|
||||
return acl_tensor;
|
||||
}
|
||||
|
||||
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
||||
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
|
||||
return true;
|
||||
|
@ -88,9 +89,10 @@ bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
|||
return false;
|
||||
}
|
||||
|
||||
aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
||||
size_t type_size, int64_t* ne, size_t* nb,
|
||||
int64_t dims, aclFormat format, size_t offset) {
|
||||
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
||||
size_t type_size, int64_t* ne, size_t* nb,
|
||||
int64_t dims, aclFormat format,
|
||||
size_t offset) {
|
||||
int64_t tmp_ne[GGML_MAX_DIMS * 2];
|
||||
int64_t tmp_stride[GGML_MAX_DIMS * 2];
|
||||
|
||||
|
@ -114,9 +116,11 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
|||
return acl_tensor;
|
||||
}
|
||||
|
||||
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
int64_t* bcast_src0_ne, int64_t* bcast_src1_ne,
|
||||
size_t* bcast_src0_nb, size_t* bcast_src1_nb) {
|
||||
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
|
||||
const ggml_tensor* src1,
|
||||
int64_t* bcast_src0_ne,
|
||||
int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
|
||||
size_t* bcast_src1_nb) {
|
||||
GGML_ASSERT(ggml_can_repeat(src1, src0));
|
||||
int bcast_dim_cnt = 0;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
|
@ -140,13 +144,11 @@ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
|||
return bcast_dim_cnt;
|
||||
}
|
||||
|
||||
int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
|
||||
const int64_t* weight_ne, const int64_t* dst_ne,
|
||||
const size_t* input_nb, const size_t* weight_nb,
|
||||
const size_t* dst_nb, int64_t* bcast_input_ne,
|
||||
int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||
size_t* bcast_input_nb, size_t* bcast_weight_nb,
|
||||
size_t* bcast_dst_nb) {
|
||||
int64_t ggml_cann_get_mulmat_bcast_shape(
|
||||
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
|
||||
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
|
||||
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
|
||||
// input and dst shoule in same shape, except first two dims.
|
||||
GGML_ASSERT(input_ne[2] == dst_ne[2]);
|
||||
GGML_ASSERT(input_ne[3] == dst_ne[3]);
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
* @return The corresponding aclDataType. If the input type is not recognized,
|
||||
* ACL_DT_UNDEFINED is returned.
|
||||
*/
|
||||
aclDataType type_mapping(ggml_type type);
|
||||
aclDataType ggml_cann_type_mapping(ggml_type type);
|
||||
|
||||
/**
|
||||
* @brief Creates an ACL tensor from a ggml_tensor with optional shape.
|
||||
|
@ -59,7 +59,7 @@ aclDataType type_mapping(ggml_type type);
|
|||
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
||||
* @return Pointer to the created ACL tensor.
|
||||
*/
|
||||
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
||||
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
||||
size_t* nb = nullptr, int64_t dims = 0,
|
||||
aclFormat format = ACL_FORMAT_ND,
|
||||
size_t offset = 0);
|
||||
|
@ -83,7 +83,7 @@ aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
|||
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
||||
* @return Pointer to the created ACL tensor.
|
||||
*/
|
||||
aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
||||
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
||||
size_t type_size, int64_t* ne, size_t* nb,
|
||||
int64_t dims, aclFormat format = ACL_FORMAT_ND,
|
||||
size_t offset = 0);
|
||||
|
@ -104,7 +104,7 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
|||
* to 1. If such a dimension is found, broadcasting is required to align t1
|
||||
* with t0 for element-wise operations.
|
||||
*/
|
||||
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
||||
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
||||
|
||||
/**
|
||||
* @brief Computes broadcast shapes and strides for two ggml_tensors.
|
||||
|
@ -159,19 +159,19 @@ bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
|||
* dim1 in a inserted dim, should add nb for dim1,
|
||||
* and all other nb moves to next in order.
|
||||
*/
|
||||
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
|
||||
size_t* bcast_nb_src0, size_t* bcast_nb_src1);
|
||||
|
||||
// Bcast macro to avoid duplicate code.
|
||||
#define BCAST_SHAPE(src0, src1) \
|
||||
int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_dims = \
|
||||
get_bcast_shape(src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, \
|
||||
bcast_##src0##_nb, bcast_##src1##_nb);
|
||||
#define BCAST_SHAPE(src0, src1) \
|
||||
int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_dims = ggml_cann_get_bcast_shape( \
|
||||
src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \
|
||||
bcast_##src1##_nb);
|
||||
|
||||
#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
|
||||
|
||||
|
@ -201,17 +201,15 @@ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
|||
* shapes needed for matrix multiplication. It ensures that dimensions where
|
||||
* weight tensor requires expansion are appropriately handled to conform with
|
||||
* broadcasting rules.
|
||||
* @note compare with get_bcast_shape,mul_mat broadcast need add this new dim before
|
||||
* cast dim.
|
||||
* @sa get_bcast_shape
|
||||
* @note compare with ggml_cann_get_bcast_shape,mul_mat broadcast need add this new dim
|
||||
* before cast dim.
|
||||
* @sa ggml_cann_get_bcast_shape
|
||||
*/
|
||||
int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
|
||||
const int64_t* weight_ne, const int64_t* dst_ne,
|
||||
const size_t* input_nb, const size_t* weight_nb,
|
||||
const size_t* dst_nb, int64_t* bcast_input_ne,
|
||||
int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||
size_t* bcast_input_nb, size_t* bcast_weight_nb,
|
||||
size_t* bcast_dst_nb);
|
||||
int64_t ggml_cann_get_mulmat_bcast_shape(
|
||||
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
|
||||
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
|
||||
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb);
|
||||
|
||||
// Bcast macro to avoid duplicate code.
|
||||
#define BCAST_MUL_MAT_SHAPE(input, weight, dst) \
|
||||
|
@ -221,7 +219,7 @@ int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
|
|||
size_t bcast_##input##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_dims = get_mul_mat_bcast_shape( \
|
||||
int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape( \
|
||||
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \
|
||||
bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne, \
|
||||
bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
|
||||
|
|
|
@ -98,8 +98,8 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_tensor* src = dst->src[0];
|
||||
GGML_ASSERT(ggml_can_repeat(src, dst));
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
int64_t repeatsArray[] = {dst->ne[3] / src->ne[3], dst->ne[2] / src->ne[2],
|
||||
dst->ne[1] / src->ne[1], dst->ne[0] / src->ne[0]};
|
||||
|
@ -156,15 +156,15 @@ void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
aclTensor* acl_dst;
|
||||
|
||||
// Need bcast
|
||||
if (!ggml_are_same_shape(src0, src1) && need_bcast(src0, src1)) {
|
||||
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
||||
BCAST_SHAPE(src0, src1)
|
||||
acl_src0 = create_acl_tensor(src0, BCAST_PARAM(src0));
|
||||
acl_src1 = create_acl_tensor(src1, BCAST_PARAM(src1));
|
||||
acl_dst = create_acl_tensor(dst, BCAST_PARAM(src0));
|
||||
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
||||
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
||||
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
||||
} else {
|
||||
acl_src0 = create_acl_tensor(src0);
|
||||
acl_src1 = create_acl_tensor(src1);
|
||||
acl_dst = create_acl_tensor(dst);
|
||||
acl_src0 = ggml_cann_create_tensor(src0);
|
||||
acl_src1 = ggml_cann_create_tensor(src1);
|
||||
acl_dst = ggml_cann_create_tensor(dst);
|
||||
}
|
||||
|
||||
aclnn_add(ctx, acl_src0, acl_src1, acl_dst);
|
||||
|
@ -180,8 +180,8 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
float negative_slope;
|
||||
memcpy(&negative_slope, dst->op_params, sizeof(float));
|
||||
|
@ -237,9 +237,9 @@ static void aclnn_concat(ggml_backend_cann_context& ctx,
|
|||
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src0 = dst->src[0];
|
||||
ggml_tensor* src1 = dst->src[1];
|
||||
aclTensor* acl_src0 = create_acl_tensor(src0);
|
||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
||||
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
int64_t concat_dim = 1;
|
||||
aclTensor* tensors[] = {acl_src0, acl_src1};
|
||||
|
@ -299,7 +299,7 @@ static void aclnn_arange(ggml_backend_cann_context& ctx, aclTensor* acl_dst,
|
|||
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
int64_t n_elements = ggml_nelements(dst);
|
||||
float start;
|
||||
|
@ -328,8 +328,8 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
memcpy(&min, dst->op_params, sizeof(float));
|
||||
memcpy(&max, (float*)dst->op_params + 1, sizeof(float));
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
aclScalar* acl_min = aclCreateScalar(&min, aclDataType::ACL_FLOAT);
|
||||
aclScalar* acl_max = aclCreateScalar(&max, aclDataType::ACL_FLOAT);
|
||||
|
@ -361,8 +361,8 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
memcpy(&v, dst->op_params, sizeof(float));
|
||||
|
||||
aclScalar* scale = aclCreateScalar(&v, aclDataType::ACL_FLOAT);
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
|
@ -386,14 +386,14 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_tensor* src = dst->src[0];
|
||||
enum ggml_sort_order order = (enum ggml_sort_order)dst->op_params[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
ggml_cann_pool_alloc temp_buffer_allocator(
|
||||
ctx.pool(), ggml_nelements(dst) * sizeof(int64_t));
|
||||
void* buffer = temp_buffer_allocator.get();
|
||||
aclTensor* tmp_tensor =
|
||||
create_acl_tensor(buffer, ACL_INT64, ggml_type_size(dst->type), dst->ne,
|
||||
dst->nb, GGML_MAX_DIMS);
|
||||
ggml_cann_create_tensor(buffer, ACL_INT64, ggml_type_size(dst->type),
|
||||
dst->ne, dst->nb, GGML_MAX_DIMS);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
|
@ -411,7 +411,8 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
aclnnArgsort(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
|
||||
workspaceSize = 0;
|
||||
ACL_CHECK(aclnnCastGetWorkspaceSize(tmp_tensor, type_mapping(dst->type),
|
||||
ACL_CHECK(aclnnCastGetWorkspaceSize(tmp_tensor,
|
||||
ggml_cann_type_mapping(dst->type),
|
||||
acl_dst, &workspaceSize, &executor));
|
||||
if (workspaceSize > 0) {
|
||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||
|
@ -428,8 +429,8 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
|
@ -460,8 +461,8 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
const float eps = 1e-6f; // TODO: make this a parameter
|
||||
int n_groups = dst->op_params[0];
|
||||
|
@ -481,9 +482,9 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
|
||||
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes * 2);
|
||||
void* buffer = temp_buffer_allocator.get();
|
||||
aclTensor* acl_mean_out =
|
||||
create_acl_tensor(buffer, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
||||
aclTensor* acl_rstd_out = create_acl_tensor(
|
||||
aclTensor* acl_mean_out = ggml_cann_create_tensor(
|
||||
buffer, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
||||
aclTensor* acl_rstd_out = ggml_cann_create_tensor(
|
||||
(char*)buffer + n_bytes, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
||||
|
||||
ACL_CHECK(aclnnGroupNormGetWorkspaceSize(
|
||||
|
@ -516,9 +517,9 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
|
||||
size_t param_nb[] = {ggml_element_size(src0), nb1, nb2, nb3};
|
||||
|
||||
aclTensor* acl_dst = create_acl_tensor(
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(
|
||||
dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
||||
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||
|
||||
aclScalar* alpha = nullptr;
|
||||
float alphaValue = 1.0f;
|
||||
|
@ -532,7 +533,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
size_t cpy_size = ggml_nbytes(dst);
|
||||
ACL_CHECK(aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size,
|
||||
ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
|
||||
aclTensor* acl_src0 = create_acl_tensor(
|
||||
aclTensor* acl_src0 = ggml_cann_create_tensor(
|
||||
src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
||||
ACL_CHECK(aclnnAddGetWorkspaceSize(acl_src0, acl_src1, alpha, acl_dst,
|
||||
&workspaceSize, &executor));
|
||||
|
@ -561,10 +562,10 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] == 1);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
int64_t reduce_dims_host[] = {3};
|
||||
aclIntArray* reduce_dims = aclCreateIntArray(reduce_dims_host, 1);
|
||||
|
@ -573,9 +574,9 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
aclOpExecutor* executor;
|
||||
void* workspaceAddr = nullptr;
|
||||
|
||||
ACL_CHECK(aclnnReduceSumGetWorkspaceSize(acl_src, reduce_dims, true,
|
||||
type_mapping(src->type), acl_dst,
|
||||
&workspaceSize, &executor));
|
||||
ACL_CHECK(aclnnReduceSumGetWorkspaceSize(
|
||||
acl_src, reduce_dims, true, ggml_cann_type_mapping(src->type), acl_dst,
|
||||
&workspaceSize, &executor));
|
||||
if (workspaceSize > 0) {
|
||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||
workspaceAddr = workspace_allocator.get();
|
||||
|
@ -592,9 +593,9 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|||
ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
aclTensor* acl_src =
|
||||
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
aclTensor* acl_dst =
|
||||
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
|
||||
std::vector<int64_t> output_size{dst->ne[1], dst->ne[0]};
|
||||
auto output_size_array = aclCreateIntArray(output_size.data(), 2);
|
||||
|
@ -659,8 +660,8 @@ static void aclnn_pad(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
|
||||
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
// padding: value in the array means how much distance will be padding.
|
||||
// the position of elements in the array means which dirction to padding,
|
||||
|
@ -694,9 +695,9 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
|
|||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src =
|
||||
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
aclTensor* acl_dst =
|
||||
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
|
||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||
const int k0 = opts[1];
|
||||
|
@ -732,7 +733,8 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
|
|||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
ACL_CHECK(aclnnAvgPool2d(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
ACL_CHECK(
|
||||
aclnnAvgPool2d(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
|
@ -760,9 +762,9 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
|||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src =
|
||||
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
aclTensor* acl_dst =
|
||||
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||
|
||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||
const int k0 = opts[1];
|
||||
|
@ -784,9 +786,9 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
|||
ggml_cann_pool_alloc temp_buffer_allocator(
|
||||
ctx.pool(), ggml_nbytes(src) + p0 * 2 + p1 * 2 * src->nb[1]);
|
||||
void* buffer = temp_buffer_allocator.get();
|
||||
aclTensor* tmp_tensor =
|
||||
create_acl_tensor(buffer, ACL_FLOAT, ggml_element_size(src), temp_ne,
|
||||
temp_nb, GGML_MAX_DIMS, ACL_FORMAT_NCHW);
|
||||
aclTensor* tmp_tensor = ggml_cann_create_tensor(
|
||||
buffer, ACL_FLOAT, ggml_element_size(src), temp_ne, temp_nb,
|
||||
GGML_MAX_DIMS, ACL_FORMAT_NCHW);
|
||||
|
||||
// pad: see padding in ggml_cann_pad()
|
||||
int64_t paddings[] = {p0, p0, p1, p1, 0, 0, 0, 0};
|
||||
|
@ -819,7 +821,8 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
|||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
ACL_CHECK(aclnnMaxPool(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
ACL_CHECK(
|
||||
aclnnMaxPool(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
|
@ -870,14 +873,15 @@ static void cann_copy(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
ACL_CHECK(aclnnInplaceCopy(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
ACL_CHECK(
|
||||
aclnnInplaceCopy(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
}
|
||||
|
||||
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
ggml_cann_pool_alloc src_extra_allocator(ctx.pool(), sizeof(ggml_tensor));
|
||||
ggml_cann_pool_alloc dst_extra_allocator(ctx.pool(), sizeof(ggml_tensor));
|
||||
|
@ -891,7 +895,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ctx.stream()));
|
||||
|
||||
if ((dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32) &&
|
||||
ggml_are_same_shape(src, dst)) {
|
||||
ggml_are_same_shape(src, dst)) {
|
||||
cann_copy(ctx, acl_src, acl_dst);
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
|
@ -1070,7 +1074,8 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
|
|||
}
|
||||
|
||||
ACL_CHECK(aclrtMemsetAsync(buffer, n_bytes, 0, n_bytes, ctx.stream()));
|
||||
aclTensor* zero = create_acl_tensor(buffer, type, type_size, ne, nb, dims);
|
||||
aclTensor* zero =
|
||||
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
|
||||
return zero;
|
||||
}
|
||||
|
||||
|
@ -1122,8 +1127,8 @@ static aclTensor* aclnn_ones(ggml_backend_cann_context& ctx, void* buffer,
|
|||
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
|
@ -1137,16 +1142,17 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
size_t one_tensor_n_bytes = src->ne[0] * ggml_element_size(src);
|
||||
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
||||
|
||||
aclTensor* acl_gamma =
|
||||
aclnn_ones(ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne,
|
||||
1, type_mapping(src->type), ggml_element_size(src));
|
||||
aclTensor* acl_gamma = aclnn_ones(
|
||||
ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne, 1,
|
||||
ggml_cann_type_mapping(src->type), ggml_element_size(src));
|
||||
|
||||
size_t zero_tensor_n_bytes =
|
||||
src->ne[1] * src->ne[2] * src->ne[3] * ggml_element_size(src);
|
||||
ggml_cann_pool_alloc zero_tensor_allocator(ctx.pool(), zero_tensor_n_bytes);
|
||||
aclTensor* acl_rstd = aclnn_zero(
|
||||
ctx, zero_tensor_allocator.get(), zero_tensor_n_bytes, src->ne,
|
||||
GGML_MAX_DIMS, type_mapping(src->type), ggml_element_size(src));
|
||||
aclTensor* acl_rstd =
|
||||
aclnn_zero(ctx, zero_tensor_allocator.get(), zero_tensor_n_bytes,
|
||||
src->ne, GGML_MAX_DIMS, ggml_cann_type_mapping(src->type),
|
||||
ggml_element_size(src));
|
||||
|
||||
ACL_CHECK(aclnnRmsNormGetWorkspaceSize(
|
||||
acl_src, acl_gamma, eps, acl_dst, acl_rstd, &workspaceSize, &executor));
|
||||
|
@ -1170,8 +1176,8 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
float value) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
const int n_past = ((int32_t*)dst->op_params)[0];
|
||||
|
||||
|
@ -1179,9 +1185,10 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
src->ne[3] * ggml_element_size(src);
|
||||
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
||||
|
||||
aclTensor* mask_tensor = aclnn_ones(
|
||||
ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne,
|
||||
GGML_MAX_DIMS, type_mapping(src->type), ggml_element_size(src), value);
|
||||
aclTensor* mask_tensor =
|
||||
aclnn_ones(ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne,
|
||||
GGML_MAX_DIMS, ggml_cann_type_mapping(src->type),
|
||||
ggml_element_size(src), value);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
|
@ -1336,7 +1343,7 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
GGML_ASSERT(nb10 == sizeof(float));
|
||||
|
||||
// im2col: [N,C,H,W] -> [N, IC * KH * KW, OW * OH]
|
||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
||||
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||
int64_t tmp_im2col_ne[] = {OW * OH, IC * KH * KW, N};
|
||||
size_t tmp_im2col_nb[GGML_MAX_DIMS - 1];
|
||||
|
||||
|
@ -1351,9 +1358,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_cann_pool_alloc im2col_allocator(
|
||||
ctx.pool(), ggml_nelements(dst) * ggml_element_size(src1));
|
||||
void* tmp_im2col_buffer = im2col_allocator.get();
|
||||
aclTensor* tmp_im2col_tensor = create_acl_tensor(
|
||||
tmp_im2col_buffer, type_mapping(src1->type), ggml_type_size(src1->type),
|
||||
tmp_im2col_ne, tmp_im2col_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_im2col_tensor = ggml_cann_create_tensor(
|
||||
tmp_im2col_buffer, ggml_cann_type_mapping(src1->type),
|
||||
ggml_type_size(src1->type), tmp_im2col_ne, tmp_im2col_nb,
|
||||
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
||||
|
||||
std::vector<int64_t> kernel_dims = {KH, KW};
|
||||
std::vector<int64_t> dilation_size = {d1, d0};
|
||||
|
@ -1377,7 +1385,8 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
ACL_CHECK(aclnnIm2col(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
ACL_CHECK(
|
||||
aclnnIm2col(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
|
||||
// Cast if dst is f16.
|
||||
aclTensor* tmp_cast_tensor = nullptr;
|
||||
|
@ -1391,18 +1400,19 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
temp_cast_nb[i] = temp_cast_nb[i - 1] * tmp_im2col_ne[i - 1];
|
||||
}
|
||||
|
||||
tmp_cast_tensor = create_acl_tensor(
|
||||
tmp_cast_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_im2col_ne, temp_cast_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
||||
tmp_cast_tensor = ggml_cann_create_tensor(
|
||||
tmp_cast_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_im2col_ne, temp_cast_nb,
|
||||
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
||||
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor,
|
||||
type_mapping(dst->type));
|
||||
ggml_cann_type_mapping(dst->type));
|
||||
}
|
||||
|
||||
// Permute: [N, IC * KH * KW, OW * OH] -> [N, OW * OH, IC * KH * KW]
|
||||
int64_t dst_ne[] = {dst->ne[0], dst->ne[1] * dst->ne[2], dst->ne[3]};
|
||||
size_t dst_nb[] = {dst->nb[0], dst->nb[1], dst->nb[3]};
|
||||
aclTensor* acl_dst =
|
||||
create_acl_tensor(dst, dst_ne, dst_nb, GGML_MAX_DIMS - 1);
|
||||
ggml_cann_create_tensor(dst, dst_ne, dst_nb, GGML_MAX_DIMS - 1);
|
||||
|
||||
int64_t permute_dim[] = {0, 2, 1};
|
||||
if (src1->type != dst->type) {
|
||||
|
@ -1517,7 +1527,8 @@ static void aclnn_muls(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
* \f]
|
||||
*
|
||||
* @param ctx The context for the CANN backend operations.
|
||||
* @param acl_src The source tensor where the multiplication result will be stored.
|
||||
* @param acl_src The source tensor where the multiplication result will be
|
||||
* stored.
|
||||
* @param acl_other The tensor whose elements will be multiplied with `acl_src`.
|
||||
*/
|
||||
static void aclnn_inplace_mul(ggml_backend_cann_context& ctx,
|
||||
|
@ -1553,9 +1564,8 @@ static void aclnn_inplace_mul(ggml_backend_cann_context& ctx,
|
|||
* @param acl_other The second tensor for element-wise multiplication.
|
||||
* @param acl_dst The destination tensor where the result will be stored.
|
||||
*/
|
||||
static void aclnn_mul(ggml_backend_cann_context& ctx,
|
||||
aclTensor* acl_src, aclTensor* acl_other,
|
||||
aclTensor* acl_dst) {
|
||||
static void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||
aclTensor* acl_other, aclTensor* acl_dst) {
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
void* workspaceAddr = nullptr;
|
||||
|
@ -1573,16 +1583,16 @@ static void aclnn_mul(ggml_backend_cann_context& ctx,
|
|||
/**
|
||||
* @brief Applies element-wise cosine function to the elements of a tensor.
|
||||
*
|
||||
* This function computes the cosine of each element in the source tensor `acl_src`
|
||||
* and stores the result in the destination tensor `acl_dst`.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
* \text {acl_dst }_i=\cos \left(\text {acl_src }_i\right)
|
||||
* \f]
|
||||
* This function computes the cosine of each element in the source tensor
|
||||
* `acl_src` and stores the result in the destination tensor `acl_dst`. The
|
||||
* operation is defined as: \f[ \text {acl_dst }_i=\cos \left(\text {acl_src
|
||||
* }_i\right) \f]
|
||||
*
|
||||
* @param ctx The context for the CANN backend operations.
|
||||
* @param acl_src The source tensor on which the cosine function will be applied.
|
||||
* @param acl_dst The destination tensor where the cosine results will be stored.
|
||||
* @param acl_src The source tensor on which the cosine function will be
|
||||
* applied.
|
||||
* @param acl_dst The destination tensor where the cosine results will be
|
||||
* stored.
|
||||
*/
|
||||
static void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||
aclTensor* acl_dst) {
|
||||
|
@ -1603,7 +1613,8 @@ static void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
/**
|
||||
* @brief Applies element-wise sine function to the elements of a tensor.
|
||||
*
|
||||
* This function computes the sine of each element in the source tensor `acl_src`
|
||||
* This function computes the sine of each element in the source tensor
|
||||
`acl_src`
|
||||
* and stores the result in the destination tensor `acl_dst`.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
|
@ -1641,7 +1652,7 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|||
const int max_period = dst->op_params[1];
|
||||
int half = dim / 2;
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
|
||||
// arange: [0, ..., half)
|
||||
float start = 0;
|
||||
|
@ -1653,9 +1664,10 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|||
|
||||
ggml_cann_pool_alloc arange_allocator(ctx.pool(), half * sizeof(dst->type));
|
||||
void* tmp_arange_buffer = arange_allocator.get();
|
||||
aclTensor* tmp_arange_tensor = create_acl_tensor(
|
||||
tmp_arange_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_arange_ne, tmp_arange_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_arange_tensor = ggml_cann_create_tensor(
|
||||
tmp_arange_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_arange_ne, tmp_arange_nb,
|
||||
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
|
||||
aclnn_arange(ctx, tmp_arange_tensor, start, stop, step, n_elements_arange);
|
||||
|
||||
|
@ -1675,9 +1687,10 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|||
|
||||
ggml_cann_pool_alloc permute_allocator(ctx.pool(), ggml_nbytes(src));
|
||||
void* tmp_permute_buffer = permute_allocator.get();
|
||||
aclTensor* tmp_permute_tenosr = create_acl_tensor(
|
||||
tmp_permute_buffer, type_mapping(src->type), ggml_type_size(src->type),
|
||||
tmp_permute_ne, tmp_permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_permute_tenosr = ggml_cann_create_tensor(
|
||||
tmp_permute_buffer, ggml_cann_type_mapping(src->type),
|
||||
ggml_type_size(src->type), tmp_permute_ne, tmp_permute_nb,
|
||||
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
int64_t permute_dim[] = {0, 1, 3, 2};
|
||||
int64_t num_dims = 4;
|
||||
aclnn_permute(ctx, acl_src, tmp_permute_tenosr, permute_dim, num_dims);
|
||||
|
@ -1697,19 +1710,20 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|||
ggml_cann_pool_alloc mul_allocator(
|
||||
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
||||
void* tmp_mul_buffer = mul_allocator.get();
|
||||
aclTensor* tmp_mul_tensor = create_acl_tensor(
|
||||
tmp_mul_buffer, type_mapping(src->type), ggml_type_size(src->type),
|
||||
tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclnn_mul(ctx, tmp_permute_tenosr, tmp_arange_tensor,
|
||||
tmp_mul_tensor);
|
||||
aclTensor* tmp_mul_tensor = ggml_cann_create_tensor(
|
||||
tmp_mul_buffer, ggml_cann_type_mapping(src->type),
|
||||
ggml_type_size(src->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
||||
ACL_FORMAT_ND);
|
||||
aclnn_mul(ctx, tmp_permute_tenosr, tmp_arange_tensor, tmp_mul_tensor);
|
||||
|
||||
// cos
|
||||
ggml_cann_pool_alloc cos_allocator(
|
||||
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
||||
void* tmp_cos_buffer = cos_allocator.get();
|
||||
aclTensor* tmp_cos_tensor = create_acl_tensor(
|
||||
tmp_cos_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_cos_tensor = ggml_cann_create_tensor(
|
||||
tmp_cos_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
||||
ACL_FORMAT_ND);
|
||||
|
||||
aclnn_cos(ctx, tmp_mul_tensor, tmp_cos_tensor);
|
||||
|
||||
|
@ -1717,15 +1731,16 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|||
ggml_cann_pool_alloc sin_allocator(
|
||||
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
||||
void* tmp_sin_buffer = sin_allocator.get();
|
||||
aclTensor* tmp_sin_tensor = create_acl_tensor(
|
||||
tmp_sin_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_sin_tensor = ggml_cann_create_tensor(
|
||||
tmp_sin_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
||||
ACL_FORMAT_ND);
|
||||
|
||||
aclnn_sin(ctx, tmp_mul_tensor, tmp_sin_tensor);
|
||||
|
||||
// concat
|
||||
int64_t concat_dim = 3;
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
aclTensor* tensors[] = {tmp_cos_tensor, tmp_sin_tensor};
|
||||
aclTensorList* tensorList = aclCreateTensorList(tensors, 2);
|
||||
aclnn_concat(ctx, tensorList, acl_dst, concat_dim);
|
||||
|
@ -1816,7 +1831,8 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
|
|||
* @param acl_dst The destination tensor where the result will be stored.
|
||||
* @param n_head The number of attention heads.
|
||||
* @param src_ne The dimensions of the source tensor.
|
||||
* @param src_nb0 The byte size of the first dimension of the source tensor.
|
||||
* @param src_nb0 The byte size of the first dimension of the source
|
||||
tensor.
|
||||
* @param max_bias The maximum bias value used in the Alibi mechanism.
|
||||
* @param dst The destination tensor object for additional metadata.
|
||||
*
|
||||
|
@ -1858,9 +1874,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
|
||||
int64_t tmp_arange1_ne[] = {n_heads_log2_floor};
|
||||
size_t tmp_arange1_nb[] = {sizeof(dst->type)};
|
||||
aclTensor* tmp_arange1_tensor = create_acl_tensor(
|
||||
tmp_arange_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_arange1_ne, tmp_arange1_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_arange1_tensor = ggml_cann_create_tensor(
|
||||
tmp_arange_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_arange1_ne, tmp_arange1_nb,
|
||||
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
|
||||
aclnn_arange(ctx, tmp_arange1_tensor, start, stop, step, n_elements_arange);
|
||||
|
||||
|
@ -1874,11 +1891,11 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
int64_t tmp_arange2_ne[] = {ne2_ne3 - n_heads_log2_floor};
|
||||
size_t tmp_arange2_nb[] = {sizeof(dst->type)};
|
||||
|
||||
aclTensor* tmp_arange2_tensor = create_acl_tensor(
|
||||
aclTensor* tmp_arange2_tensor = ggml_cann_create_tensor(
|
||||
(char*)tmp_arange_buffer +
|
||||
n_heads_log2_floor * ggml_type_size(dst->type),
|
||||
type_mapping(dst->type), ggml_type_size(dst->type), tmp_arange2_ne,
|
||||
tmp_arange2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_arange2_ne, tmp_arange2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclnn_arange(ctx, tmp_arange2_tensor, start, stop, step,
|
||||
n_elements_arange);
|
||||
}
|
||||
|
@ -1889,9 +1906,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
void* tmp_mk_base_buffer = mk_base_allocator.get();
|
||||
int64_t tmp_mk_base1_ne[] = {n_heads_log2_floor};
|
||||
size_t tmp_mk_base1_nb[] = {sizeof(dst->type)};
|
||||
aclTensor* tmp_mk_base1_tensor = create_acl_tensor(
|
||||
tmp_mk_base_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mk_base1_ne, tmp_mk_base1_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_mk_base1_tensor = ggml_cann_create_tensor(
|
||||
tmp_mk_base_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_mk_base1_ne, tmp_mk_base1_nb,
|
||||
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
|
||||
aclnn_fill_scalar(ctx, m0, tmp_mk_base1_tensor);
|
||||
|
||||
|
@ -1899,23 +1917,25 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
if (n_heads_log2_floor < ne2_ne3) {
|
||||
int64_t tmp_mk_base2_ne[] = {ne2_ne3 - n_heads_log2_floor};
|
||||
size_t tmp_mk_base2_nb[] = {sizeof(dst->type)};
|
||||
aclTensor* tmp_mk_base2_tensor = create_acl_tensor(
|
||||
aclTensor* tmp_mk_base2_tensor = ggml_cann_create_tensor(
|
||||
(char*)tmp_mk_base_buffer +
|
||||
n_heads_log2_floor * ggml_type_size(dst->type),
|
||||
type_mapping(dst->type), ggml_type_size(dst->type), tmp_mk_base2_ne,
|
||||
tmp_mk_base2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mk_base2_ne, tmp_mk_base2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclnn_fill_scalar(ctx, m1, tmp_mk_base2_tensor);
|
||||
}
|
||||
|
||||
// init mk
|
||||
int64_t tmp_mk_base_ne[] = {ne2_ne3};
|
||||
size_t tmp_mk_base_nb[] = {sizeof(dst->type)};
|
||||
aclTensor* tmp_mk_base_tensor = create_acl_tensor(
|
||||
tmp_mk_base_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mk_base_ne, tmp_mk_base_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_arange_tensor = create_acl_tensor(
|
||||
tmp_arange_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mk_base_ne, tmp_mk_base_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_mk_base_tensor = ggml_cann_create_tensor(
|
||||
tmp_mk_base_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_mk_base_ne, tmp_mk_base_nb,
|
||||
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_arange_tensor = ggml_cann_create_tensor(
|
||||
tmp_arange_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_mk_base_ne, tmp_mk_base_nb,
|
||||
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||
aclnn_pow_tensor_tensor(ctx, tmp_mk_base_tensor, tmp_arange_tensor);
|
||||
|
||||
// reshape mk
|
||||
|
@ -1925,9 +1945,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||
tmp_mk_nb[i] = tmp_mk_nb[i - 1] * tmp_mk_ne[i - 1];
|
||||
}
|
||||
aclTensor* tmp_mk_tensor = create_acl_tensor(
|
||||
tmp_mk_base_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_mk_ne, tmp_mk_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_mk_tensor = ggml_cann_create_tensor(
|
||||
tmp_mk_base_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_mk_ne, tmp_mk_nb, GGML_MAX_DIMS,
|
||||
ACL_FORMAT_ND);
|
||||
|
||||
// acl_position * mk
|
||||
int64_t tmp_output_ne[] = {src_ne[0], src_ne[1], src_ne[2], src_ne[3]};
|
||||
|
@ -1938,9 +1959,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|||
}
|
||||
ggml_cann_pool_alloc output_allocator(ctx.pool(), ggml_nbytes(dst));
|
||||
void* tmp_output_buffer = output_allocator.get();
|
||||
aclTensor* tmp_output_tensor = create_acl_tensor(
|
||||
tmp_output_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
||||
tmp_output_ne, tmp_output_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* tmp_output_tensor = ggml_cann_create_tensor(
|
||||
tmp_output_buffer, ggml_cann_type_mapping(dst->type),
|
||||
ggml_type_size(dst->type), tmp_output_ne, tmp_output_nb, GGML_MAX_DIMS,
|
||||
ACL_FORMAT_ND);
|
||||
aclnn_mul(ctx, acl_position, tmp_mk_tensor, tmp_output_tensor);
|
||||
|
||||
// add
|
||||
|
@ -2031,8 +2053,8 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_tensor* src0 = dst->src[0];
|
||||
ggml_tensor* src1 = dst->src[1]; // mask
|
||||
|
||||
aclTensor* acl_src0 = create_acl_tensor(src0);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
float scale = 1.0f;
|
||||
float max_bias = 0.0f;
|
||||
|
@ -2046,7 +2068,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
size_t n_bytes = ggml_nbytes(src0);
|
||||
ggml_cann_pool_alloc mul_scale_allocator(ctx.pool(), n_bytes);
|
||||
void* input_mul_scale_buffer = mul_scale_allocator.get();
|
||||
aclTensor* acl_input_mul_scale_tensor = create_acl_tensor(
|
||||
aclTensor* acl_input_mul_scale_tensor = ggml_cann_create_tensor(
|
||||
input_mul_scale_buffer, ACL_FLOAT, ggml_type_size(src0->type), src0->ne,
|
||||
src0->nb, GGML_MAX_DIMS);
|
||||
|
||||
|
@ -2069,18 +2091,15 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
}
|
||||
src1_fp32_allocator.alloc(n_bytes);
|
||||
void* src1_fp32_buffer = src1_fp32_allocator.get();
|
||||
acl_src1_fp32_tensor = create_acl_tensor(src1_fp32_buffer,
|
||||
ACL_FLOAT,
|
||||
sizeof(float),
|
||||
src1->ne,
|
||||
src1_fp32_nb,
|
||||
GGML_MAX_DIMS);
|
||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
||||
acl_src1_fp32_tensor = ggml_cann_create_tensor(
|
||||
src1_fp32_buffer, ACL_FLOAT, sizeof(float), src1->ne,
|
||||
src1_fp32_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||
aclnn_cast(ctx, acl_src1, acl_src1_fp32_tensor, ACL_FLOAT);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||
} else {
|
||||
acl_src1_fp32_tensor = create_acl_tensor(src1);
|
||||
acl_src1_fp32_tensor = ggml_cann_create_tensor(src1);
|
||||
}
|
||||
|
||||
// broadcast the mask across rows, only use ne11 of ne01 in mask
|
||||
|
@ -2092,7 +2111,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||
tmp_mask_nb[i] = tmp_mask_nb[i - 1] * tmp_mask_ne[i - 1];
|
||||
}
|
||||
tmp_mask_tensor = create_acl_tensor(
|
||||
tmp_mask_tensor = ggml_cann_create_tensor(
|
||||
src1->data, ACL_FLOAT, sizeof(float), tmp_mask_ne, tmp_mask_nb,
|
||||
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
}
|
||||
|
@ -2104,7 +2123,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
n_bytes = ggml_nbytes(dst);
|
||||
ggml_cann_pool_alloc output_allocator(ctx.pool(), n_bytes);
|
||||
void* output_buffer = output_allocator.get();
|
||||
aclTensor* alibi_output_tensor = create_acl_tensor(
|
||||
aclTensor* alibi_output_tensor = ggml_cann_create_tensor(
|
||||
output_buffer, ACL_FLOAT, ggml_type_size(dst->type), dst->ne,
|
||||
dst->nb, GGML_MAX_DIMS);
|
||||
if (max_bias <= 0.0f) {
|
||||
|
@ -2116,18 +2135,16 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
aclnn_add(ctx, acl_src1_fp32_tensor, acl_input_mul_scale_tensor,
|
||||
alibi_output_tensor);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// slope != 1.0
|
||||
if (tmp_mask_tensor) {
|
||||
aclnn_alibi(ctx, acl_input_mul_scale_tensor, tmp_mask_tensor,
|
||||
alibi_output_tensor, n_head, src0->ne, src_nb0, max_bias,
|
||||
dst);
|
||||
}
|
||||
else {
|
||||
aclnn_alibi(ctx, acl_input_mul_scale_tensor, acl_src1_fp32_tensor,
|
||||
alibi_output_tensor, n_head, src0->ne, src_nb0, max_bias,
|
||||
dst);
|
||||
alibi_output_tensor, n_head, src0->ne, src_nb0,
|
||||
max_bias, dst);
|
||||
} else {
|
||||
aclnn_alibi(ctx, acl_input_mul_scale_tensor,
|
||||
acl_src1_fp32_tensor, alibi_output_tensor, n_head,
|
||||
src0->ne, src_nb0, max_bias, dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2277,7 +2294,8 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
|
|||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
ACL_CHECK(aclnnMatmul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
ACL_CHECK(
|
||||
aclnnMatmul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2310,10 +2328,10 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
|
|||
bcast_weight_nb[4], bcast_weight_nb[5]};
|
||||
|
||||
aclTensor* acl_weight_tensor =
|
||||
create_acl_tensor(weight, transpose_ne, transpose_nb, bcast_dims);
|
||||
ggml_cann_create_tensor(weight, transpose_ne, transpose_nb, bcast_dims);
|
||||
aclTensor* acl_input_tensor =
|
||||
create_acl_tensor(input, BCAST_MUL_MAT_PARAM(input));
|
||||
aclTensor* acl_dst = create_acl_tensor(dst, BCAST_MUL_MAT_PARAM(dst));
|
||||
ggml_cann_create_tensor(input, BCAST_MUL_MAT_PARAM(input));
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst, BCAST_MUL_MAT_PARAM(dst));
|
||||
aclnn_mat_mul(ctx, acl_input_tensor, acl_weight_tensor, acl_dst);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_weight_tensor));
|
||||
|
@ -2364,7 +2382,7 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
|||
size_t input_stride = input_elem_size * src1->ne[0] * src1->ne[1];
|
||||
|
||||
if (src1->type != GGML_TYPE_F16) {
|
||||
aclTensor* acl_src1_tensor = create_acl_tensor(src1);
|
||||
aclTensor* acl_src1_tensor = ggml_cann_create_tensor(src1);
|
||||
ggml_cann_pool_alloc input_alloctor(
|
||||
ctx.pool(), ggml_nelements(src1) * input_elem_size);
|
||||
input_buffer = input_alloctor.get();
|
||||
|
@ -2376,9 +2394,9 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
|||
input_cast_nb[i] = input_cast_nb[i - 1] * input_cast_ne[i - 1];
|
||||
}
|
||||
|
||||
aclTensor* acl_input_tensor =
|
||||
create_acl_tensor(input_buffer, ACL_FLOAT16, input_elem_size,
|
||||
input_cast_ne, input_cast_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||
input_buffer, ACL_FLOAT16, input_elem_size, input_cast_ne,
|
||||
input_cast_nb, GGML_MAX_DIMS);
|
||||
aclnn_cast(ctx, acl_src1_tensor, acl_input_tensor, ACL_FLOAT16);
|
||||
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
||||
ACL_CHECK(aclDestroyTensor(acl_src1_tensor));
|
||||
|
@ -2408,16 +2426,16 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
|||
int64_t batch1 = n1 * src1->ne[2] + c1;
|
||||
int64_t batch0 = n0 * src0->ne[2] + c0;
|
||||
|
||||
aclTensor* acl_input_tensor = create_acl_tensor(
|
||||
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||
(char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
|
||||
input_elem_size, input_ne, input_nb, 2);
|
||||
aclTensor* acl_weight_tensor = create_acl_tensor(
|
||||
aclTensor* acl_weight_tensor = ggml_cann_create_tensor(
|
||||
(char*)src0->data + batch0 * weight_stride, ACL_INT8,
|
||||
weight_elem_size, weight_ne, weight_nb, 2);
|
||||
aclTensor* acl_scale_tensor = create_acl_tensor(
|
||||
aclTensor* acl_scale_tensor = ggml_cann_create_tensor(
|
||||
scale_offset + batch0 * scale_stride, ACL_FLOAT16,
|
||||
scale_elem_size, scale_ne, scale_nb, 2);
|
||||
aclTensor* acl_output_tensor = create_acl_tensor(
|
||||
aclTensor* acl_output_tensor = ggml_cann_create_tensor(
|
||||
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
|
||||
output_elem_size, output_ne, output_nb, 2);
|
||||
|
||||
|
@ -2451,9 +2469,9 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
|||
}
|
||||
|
||||
aclTensor* acl_output_tensor =
|
||||
create_acl_tensor(output_buffer, ACL_FLOAT16, output_elem_size,
|
||||
output_cast_ne, output_cast_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_dst_tensor = create_acl_tensor(dst);
|
||||
ggml_cann_create_tensor(output_buffer, ACL_FLOAT16, output_elem_size,
|
||||
output_cast_ne, output_cast_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_dst_tensor = ggml_cann_create_tensor(dst);
|
||||
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, ACL_FLOAT);
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_output_tensor));
|
||||
|
@ -2575,8 +2593,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
arange_length * sizeof(float_t)};
|
||||
|
||||
aclTensor* acl_arange_tensor =
|
||||
create_acl_tensor(arange_buffer, ACL_FLOAT, sizeof(float_t), arange_ne,
|
||||
arange_nb, GGML_MAX_DIMS);
|
||||
ggml_cann_create_tensor(arange_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
arange_ne, arange_nb, GGML_MAX_DIMS);
|
||||
float start = 0;
|
||||
float step = 1;
|
||||
float stop = src0->ne[0] / 2;
|
||||
|
@ -2604,9 +2622,9 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t),
|
||||
sizeof(int32_t) * position_length,
|
||||
sizeof(int32_t) * position_length};
|
||||
aclTensor* acl_position_tensor = create_acl_tensor(
|
||||
src1->data, type_mapping(src1->type), ggml_type_size(src1->type),
|
||||
position_ne, position_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_position_tensor = ggml_cann_create_tensor(
|
||||
src1->data, ggml_cann_type_mapping(src1->type),
|
||||
ggml_type_size(src1->type), position_ne, position_nb, GGML_MAX_DIMS);
|
||||
|
||||
// power * position
|
||||
int64_t theta_length = arange_length * position_length;
|
||||
|
@ -2620,10 +2638,10 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
theta_nb[i] = theta_nb[i - 1] * theta_ne[i - 1];
|
||||
}
|
||||
aclTensor* acl_theta_tensor =
|
||||
create_acl_tensor(theta_buffer, ACL_FLOAT, sizeof(float_t), theta_ne,
|
||||
theta_nb, GGML_MAX_DIMS);
|
||||
ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
theta_ne, theta_nb, GGML_MAX_DIMS);
|
||||
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
|
||||
acl_theta_tensor);
|
||||
acl_theta_tensor);
|
||||
|
||||
// permute: [0,1,2,3]->[0,2,1,3]
|
||||
int64_t permute_ne[] = {arange_length, 1, position_length, 1};
|
||||
|
@ -2635,9 +2653,9 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
ggml_cann_pool_alloc permute_allocator(ctx.pool(),
|
||||
theta_length * sizeof(float_t));
|
||||
void* permute_buffer = permute_allocator.get();
|
||||
aclTensor* acl_permute_tensor =
|
||||
create_acl_tensor(permute_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
permute_ne, permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* acl_permute_tensor = ggml_cann_create_tensor(
|
||||
permute_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
||||
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
int64_t permute_dim[] = {0, 2, 1, 3};
|
||||
int64_t num_dims = 4;
|
||||
aclnn_permute(ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
|
||||
|
@ -2647,17 +2665,17 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|||
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
|
||||
theta_length * sizeof(float_t));
|
||||
void* sin_buffer = sin_allocator.get();
|
||||
aclTensor* acl_sin_tensor =
|
||||
create_acl_tensor(sin_buffer, ACL_FLOAT, sizeof(float_t), permute_ne,
|
||||
permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* acl_sin_tensor = ggml_cann_create_tensor(
|
||||
sin_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
||||
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclnn_sin(ctx, acl_permute_tensor, acl_sin_tensor);
|
||||
|
||||
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
|
||||
theta_length * sizeof(float_t));
|
||||
void* cos_buffer = cos_allocator.get();
|
||||
aclTensor* acl_cos_tensor =
|
||||
create_acl_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t), permute_ne,
|
||||
permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclTensor* acl_cos_tensor = ggml_cann_create_tensor(
|
||||
cos_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
||||
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||
aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
|
||||
|
||||
// repeat
|
||||
|
@ -2742,11 +2760,11 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
|
||||
}
|
||||
aclTensor* acl_sin_reshape_tensor =
|
||||
create_acl_tensor(sin_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
||||
ggml_cann_create_tensor(sin_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_cos_reshape_tensor =
|
||||
create_acl_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
||||
ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
||||
aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
|
||||
theta_scale, is_neox);
|
||||
|
||||
|
@ -2767,13 +2785,14 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||
input_roll_nb[i] = input_roll_nb[i - 1] * input_roll_ne[i - 1];
|
||||
}
|
||||
aclTensor* acl_input_roll_tensor =
|
||||
create_acl_tensor(input_roll_buffer, type_mapping(src0->type),
|
||||
ggml_type_size(src0->type), input_roll_ne,
|
||||
input_roll_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_tensor = create_acl_tensor(
|
||||
src0->data, type_mapping(src0->type), ggml_type_size(src0->type),
|
||||
input_roll_ne, input_roll_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor(
|
||||
input_roll_buffer, ggml_cann_type_mapping(src0->type),
|
||||
ggml_type_size(src0->type), input_roll_ne, input_roll_nb,
|
||||
GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||
src0->data, ggml_cann_type_mapping(src0->type),
|
||||
ggml_type_size(src0->type), input_roll_ne, input_roll_nb,
|
||||
GGML_MAX_DIMS);
|
||||
|
||||
int64_t shifts[] = {1};
|
||||
int64_t dims[] = {3};
|
||||
|
@ -2806,10 +2825,10 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
// roll input: [q0,q1,q2,...] ->
|
||||
// [q_half,q_half+1,...,q_end,q0,q1,...q_half-1]
|
||||
input_roll_buffer = roll_allocator.get();
|
||||
aclTensor* acl_input_roll_tensor = create_acl_tensor(
|
||||
input_roll_buffer, type_mapping(src0->type),
|
||||
aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor(
|
||||
input_roll_buffer, ggml_cann_type_mapping(src0->type),
|
||||
ggml_type_size(src0->type), src0->ne, src0->nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_tensor = create_acl_tensor(src0);
|
||||
aclTensor* acl_input_tensor = ggml_cann_create_tensor(src0);
|
||||
|
||||
int64_t shifts[] = {src0->ne[0] / 2};
|
||||
int64_t dims[] = {3};
|
||||
|
@ -2837,7 +2856,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||
first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1];
|
||||
}
|
||||
aclTensor* acl_first_half_tensor = create_acl_tensor(
|
||||
aclTensor* acl_first_half_tensor = ggml_cann_create_tensor(
|
||||
minus_one_scale_buffer, ACL_FLOAT, sizeof(float_t), first_half_ne,
|
||||
first_half_nb, GGML_MAX_DIMS);
|
||||
bool inplace = true;
|
||||
|
@ -2858,19 +2877,19 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||
input_nb[i] = input_nb[i - 1] * src0->ne[i - 1];
|
||||
}
|
||||
aclTensor* acl_input_roll_mul_scale_tensor = create_acl_tensor(
|
||||
input_roll_mul_scale_buffer, type_mapping(src0->type),
|
||||
aclTensor* acl_input_roll_mul_scale_tensor = ggml_cann_create_tensor(
|
||||
input_roll_mul_scale_buffer, ggml_cann_type_mapping(src0->type),
|
||||
ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_roll_reshape_tensor = ggml_cann_create_tensor(
|
||||
input_roll_buffer, ggml_cann_type_mapping(src0->type),
|
||||
ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
|
||||
aclTensor* acl_input_roll_reshape_tensor = create_acl_tensor(
|
||||
input_roll_buffer, type_mapping(src0->type), ggml_type_size(src0->type),
|
||||
src0->ne, input_nb, GGML_MAX_DIMS);
|
||||
|
||||
aclnn_mul(ctx, acl_input_roll_reshape_tensor,
|
||||
acl_minus_one_tensor, acl_input_roll_mul_scale_tensor);
|
||||
aclnn_mul(ctx, acl_input_roll_reshape_tensor, acl_minus_one_tensor,
|
||||
acl_input_roll_mul_scale_tensor);
|
||||
|
||||
// output
|
||||
aclTensor* acl_src0 = create_acl_tensor(src0);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
void* output_fp32_buffer;
|
||||
if (src0->type == GGML_TYPE_F32) {
|
||||
aclnn_inplace_mul(ctx, acl_src0, acl_cos_reshape_tensor);
|
||||
|
@ -2887,26 +2906,25 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
ggml_cann_pool_alloc fp32_allocator1(
|
||||
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
||||
void* input_fp32_buffer1 = fp32_allocator1.get();
|
||||
aclTensor* input_fp32_tensor1 =
|
||||
create_acl_tensor(input_fp32_buffer1, ACL_FLOAT, sizeof(float_t),
|
||||
dst->ne, input_fp32_nb, GGML_MAX_DIMS);
|
||||
aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor(
|
||||
input_fp32_buffer1, ACL_FLOAT, sizeof(float_t), dst->ne,
|
||||
input_fp32_nb, GGML_MAX_DIMS);
|
||||
ggml_cann_pool_alloc fp32_allocator2(
|
||||
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
||||
void* input_fp32_buffer2 = fp32_allocator2.get();
|
||||
aclTensor* input_fp32_tensor2 =
|
||||
create_acl_tensor(input_fp32_buffer2, ACL_FLOAT, sizeof(float_t),
|
||||
dst->ne, input_fp32_nb, GGML_MAX_DIMS);
|
||||
aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor(
|
||||
input_fp32_buffer2, ACL_FLOAT, sizeof(float_t), dst->ne,
|
||||
input_fp32_nb, GGML_MAX_DIMS);
|
||||
|
||||
ggml_cann_pool_alloc fp32_allocator(
|
||||
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
||||
output_fp32_buffer = fp32_allocator.get();
|
||||
aclTensor* output_fp32_tensor =
|
||||
create_acl_tensor(output_fp32_buffer, ACL_FLOAT, sizeof(float_t),
|
||||
dst->ne, input_fp32_nb, GGML_MAX_DIMS);
|
||||
aclnn_mul(ctx, acl_src0, acl_cos_reshape_tensor,
|
||||
input_fp32_tensor1);
|
||||
aclnn_mul(ctx, acl_input_roll_mul_scale_tensor,
|
||||
acl_sin_reshape_tensor, input_fp32_tensor2);
|
||||
aclTensor* output_fp32_tensor = ggml_cann_create_tensor(
|
||||
output_fp32_buffer, ACL_FLOAT, sizeof(float_t), dst->ne,
|
||||
input_fp32_nb, GGML_MAX_DIMS);
|
||||
aclnn_mul(ctx, acl_src0, acl_cos_reshape_tensor, input_fp32_tensor1);
|
||||
aclnn_mul(ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
|
||||
input_fp32_tensor2);
|
||||
aclnn_add(ctx, input_fp32_tensor1, input_fp32_tensor2,
|
||||
output_fp32_tensor);
|
||||
aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16);
|
||||
|
|
|
@ -497,15 +497,15 @@ void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
aclTensor* acl_dst;
|
||||
|
||||
// Need bcast
|
||||
if (!ggml_are_same_shape(src0, src1) && need_bcast(src0, src1)) {
|
||||
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
||||
BCAST_SHAPE(src0, src1)
|
||||
acl_src0 = create_acl_tensor(src0, BCAST_PARAM(src0));
|
||||
acl_src1 = create_acl_tensor(src1, BCAST_PARAM(src1));
|
||||
acl_dst = create_acl_tensor(dst, BCAST_PARAM(src0));
|
||||
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
||||
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
||||
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
||||
} else {
|
||||
acl_src0 = create_acl_tensor(src0);
|
||||
acl_src1 = create_acl_tensor(src1);
|
||||
acl_dst = create_acl_tensor(dst);
|
||||
acl_src0 = ggml_cann_create_tensor(src0);
|
||||
acl_src1 = ggml_cann_create_tensor(src1);
|
||||
acl_dst = ggml_cann_create_tensor(dst);
|
||||
}
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
|
@ -538,8 +538,8 @@ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
|
@ -569,8 +569,8 @@ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src = create_acl_tensor(src);
|
||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue