add ggml_cann prefix for acl funcs
This commit is contained in:
parent
96e09b979d
commit
57197b74b0
4 changed files with 302 additions and 284 deletions
|
@ -25,7 +25,7 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
aclDataType type_mapping(ggml_type type) {
|
aclDataType ggml_cann_type_mapping(ggml_type type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
return ACL_FLOAT;
|
return ACL_FLOAT;
|
||||||
|
@ -43,8 +43,9 @@ aclDataType type_mapping(ggml_type type) {
|
||||||
return ACL_DT_UNDEFINED;
|
return ACL_DT_UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne, size_t* nb,
|
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
||||||
int64_t dims, aclFormat format, size_t offset) {
|
size_t* nb, int64_t dims, aclFormat format,
|
||||||
|
size_t offset) {
|
||||||
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
|
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
|
||||||
// added.
|
// added.
|
||||||
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
||||||
|
@ -71,15 +72,15 @@ aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne, size_t* nb,
|
||||||
std::reverse(acl_ne, acl_ne + final_dims);
|
std::reverse(acl_ne, acl_ne + final_dims);
|
||||||
std::reverse(acl_stride, acl_stride + final_dims);
|
std::reverse(acl_stride, acl_stride + final_dims);
|
||||||
|
|
||||||
aclTensor* acl_tensor =
|
aclTensor* acl_tensor = aclCreateTensor(
|
||||||
aclCreateTensor(acl_ne, final_dims, type_mapping(tensor->type),
|
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
|
||||||
acl_stride, offset / ggml_element_size(tensor), format,
|
offset / ggml_element_size(tensor), format, &acl_storage_len, 1,
|
||||||
&acl_storage_len, 1, tensor->data);
|
tensor->data);
|
||||||
|
|
||||||
return acl_tensor;
|
return acl_tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
||||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||||
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
|
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -88,9 +89,10 @@ bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
||||||
size_t type_size, int64_t* ne, size_t* nb,
|
size_t type_size, int64_t* ne, size_t* nb,
|
||||||
int64_t dims, aclFormat format, size_t offset) {
|
int64_t dims, aclFormat format,
|
||||||
|
size_t offset) {
|
||||||
int64_t tmp_ne[GGML_MAX_DIMS * 2];
|
int64_t tmp_ne[GGML_MAX_DIMS * 2];
|
||||||
int64_t tmp_stride[GGML_MAX_DIMS * 2];
|
int64_t tmp_stride[GGML_MAX_DIMS * 2];
|
||||||
|
|
||||||
|
@ -114,9 +116,11 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
||||||
return acl_tensor;
|
return acl_tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
|
||||||
int64_t* bcast_src0_ne, int64_t* bcast_src1_ne,
|
const ggml_tensor* src1,
|
||||||
size_t* bcast_src0_nb, size_t* bcast_src1_nb) {
|
int64_t* bcast_src0_ne,
|
||||||
|
int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
|
||||||
|
size_t* bcast_src1_nb) {
|
||||||
GGML_ASSERT(ggml_can_repeat(src1, src0));
|
GGML_ASSERT(ggml_can_repeat(src1, src0));
|
||||||
int bcast_dim_cnt = 0;
|
int bcast_dim_cnt = 0;
|
||||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||||
|
@ -140,13 +144,11 @@ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||||
return bcast_dim_cnt;
|
return bcast_dim_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
|
int64_t ggml_cann_get_mulmat_bcast_shape(
|
||||||
const int64_t* weight_ne, const int64_t* dst_ne,
|
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
|
||||||
const size_t* input_nb, const size_t* weight_nb,
|
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
|
||||||
const size_t* dst_nb, int64_t* bcast_input_ne,
|
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||||
int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
|
||||||
size_t* bcast_input_nb, size_t* bcast_weight_nb,
|
|
||||||
size_t* bcast_dst_nb) {
|
|
||||||
// input and dst shoule in same shape, except first two dims.
|
// input and dst shoule in same shape, except first two dims.
|
||||||
GGML_ASSERT(input_ne[2] == dst_ne[2]);
|
GGML_ASSERT(input_ne[2] == dst_ne[2]);
|
||||||
GGML_ASSERT(input_ne[3] == dst_ne[3]);
|
GGML_ASSERT(input_ne[3] == dst_ne[3]);
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
* @return The corresponding aclDataType. If the input type is not recognized,
|
* @return The corresponding aclDataType. If the input type is not recognized,
|
||||||
* ACL_DT_UNDEFINED is returned.
|
* ACL_DT_UNDEFINED is returned.
|
||||||
*/
|
*/
|
||||||
aclDataType type_mapping(ggml_type type);
|
aclDataType ggml_cann_type_mapping(ggml_type type);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates an ACL tensor from a ggml_tensor with optional shape.
|
* @brief Creates an ACL tensor from a ggml_tensor with optional shape.
|
||||||
|
@ -59,7 +59,7 @@ aclDataType type_mapping(ggml_type type);
|
||||||
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
||||||
* @return Pointer to the created ACL tensor.
|
* @return Pointer to the created ACL tensor.
|
||||||
*/
|
*/
|
||||||
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
||||||
size_t* nb = nullptr, int64_t dims = 0,
|
size_t* nb = nullptr, int64_t dims = 0,
|
||||||
aclFormat format = ACL_FORMAT_ND,
|
aclFormat format = ACL_FORMAT_ND,
|
||||||
size_t offset = 0);
|
size_t offset = 0);
|
||||||
|
@ -83,7 +83,7 @@ aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
||||||
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
||||||
* @return Pointer to the created ACL tensor.
|
* @return Pointer to the created ACL tensor.
|
||||||
*/
|
*/
|
||||||
aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
||||||
size_t type_size, int64_t* ne, size_t* nb,
|
size_t type_size, int64_t* ne, size_t* nb,
|
||||||
int64_t dims, aclFormat format = ACL_FORMAT_ND,
|
int64_t dims, aclFormat format = ACL_FORMAT_ND,
|
||||||
size_t offset = 0);
|
size_t offset = 0);
|
||||||
|
@ -104,7 +104,7 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
|
||||||
* to 1. If such a dimension is found, broadcasting is required to align t1
|
* to 1. If such a dimension is found, broadcasting is required to align t1
|
||||||
* with t0 for element-wise operations.
|
* with t0 for element-wise operations.
|
||||||
*/
|
*/
|
||||||
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Computes broadcast shapes and strides for two ggml_tensors.
|
* @brief Computes broadcast shapes and strides for two ggml_tensors.
|
||||||
|
@ -159,19 +159,19 @@ bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
||||||
* dim1 in a inserted dim, should add nb for dim1,
|
* dim1 in a inserted dim, should add nb for dim1,
|
||||||
* and all other nb moves to next in order.
|
* and all other nb moves to next in order.
|
||||||
*/
|
*/
|
||||||
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||||
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
|
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
|
||||||
size_t* bcast_nb_src0, size_t* bcast_nb_src1);
|
size_t* bcast_nb_src0, size_t* bcast_nb_src1);
|
||||||
|
|
||||||
// Bcast macro to avoid duplicate code.
|
// Bcast macro to avoid duplicate code.
|
||||||
#define BCAST_SHAPE(src0, src1) \
|
#define BCAST_SHAPE(src0, src1) \
|
||||||
int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \
|
int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \
|
||||||
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
|
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
|
||||||
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
|
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
|
||||||
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
|
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
|
||||||
int64_t bcast_dims = \
|
int64_t bcast_dims = ggml_cann_get_bcast_shape( \
|
||||||
get_bcast_shape(src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, \
|
src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \
|
||||||
bcast_##src0##_nb, bcast_##src1##_nb);
|
bcast_##src1##_nb);
|
||||||
|
|
||||||
#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
|
#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
|
||||||
|
|
||||||
|
@ -201,17 +201,15 @@ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||||
* shapes needed for matrix multiplication. It ensures that dimensions where
|
* shapes needed for matrix multiplication. It ensures that dimensions where
|
||||||
* weight tensor requires expansion are appropriately handled to conform with
|
* weight tensor requires expansion are appropriately handled to conform with
|
||||||
* broadcasting rules.
|
* broadcasting rules.
|
||||||
* @note compare with get_bcast_shape,mul_mat broadcast need add this new dim before
|
* @note compare with ggml_cann_get_bcast_shape,mul_mat broadcast need add this new dim
|
||||||
* cast dim.
|
* before cast dim.
|
||||||
* @sa get_bcast_shape
|
* @sa ggml_cann_get_bcast_shape
|
||||||
*/
|
*/
|
||||||
int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
|
int64_t ggml_cann_get_mulmat_bcast_shape(
|
||||||
const int64_t* weight_ne, const int64_t* dst_ne,
|
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
|
||||||
const size_t* input_nb, const size_t* weight_nb,
|
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
|
||||||
const size_t* dst_nb, int64_t* bcast_input_ne,
|
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||||
int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb);
|
||||||
size_t* bcast_input_nb, size_t* bcast_weight_nb,
|
|
||||||
size_t* bcast_dst_nb);
|
|
||||||
|
|
||||||
// Bcast macro to avoid duplicate code.
|
// Bcast macro to avoid duplicate code.
|
||||||
#define BCAST_MUL_MAT_SHAPE(input, weight, dst) \
|
#define BCAST_MUL_MAT_SHAPE(input, weight, dst) \
|
||||||
|
@ -221,7 +219,7 @@ int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
|
||||||
size_t bcast_##input##_nb[GGML_MAX_DIMS * 2]; \
|
size_t bcast_##input##_nb[GGML_MAX_DIMS * 2]; \
|
||||||
size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \
|
size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \
|
||||||
size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \
|
size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \
|
||||||
int64_t bcast_dims = get_mul_mat_bcast_shape( \
|
int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape( \
|
||||||
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \
|
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \
|
||||||
bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne, \
|
bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne, \
|
||||||
bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
|
bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
|
||||||
|
|
|
@ -98,8 +98,8 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
GGML_ASSERT(ggml_can_repeat(src, dst));
|
GGML_ASSERT(ggml_can_repeat(src, dst));
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
int64_t repeatsArray[] = {dst->ne[3] / src->ne[3], dst->ne[2] / src->ne[2],
|
int64_t repeatsArray[] = {dst->ne[3] / src->ne[3], dst->ne[2] / src->ne[2],
|
||||||
dst->ne[1] / src->ne[1], dst->ne[0] / src->ne[0]};
|
dst->ne[1] / src->ne[1], dst->ne[0] / src->ne[0]};
|
||||||
|
@ -156,15 +156,15 @@ void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
aclTensor* acl_dst;
|
aclTensor* acl_dst;
|
||||||
|
|
||||||
// Need bcast
|
// Need bcast
|
||||||
if (!ggml_are_same_shape(src0, src1) && need_bcast(src0, src1)) {
|
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
||||||
BCAST_SHAPE(src0, src1)
|
BCAST_SHAPE(src0, src1)
|
||||||
acl_src0 = create_acl_tensor(src0, BCAST_PARAM(src0));
|
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
||||||
acl_src1 = create_acl_tensor(src1, BCAST_PARAM(src1));
|
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
||||||
acl_dst = create_acl_tensor(dst, BCAST_PARAM(src0));
|
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
||||||
} else {
|
} else {
|
||||||
acl_src0 = create_acl_tensor(src0);
|
acl_src0 = ggml_cann_create_tensor(src0);
|
||||||
acl_src1 = create_acl_tensor(src1);
|
acl_src1 = ggml_cann_create_tensor(src1);
|
||||||
acl_dst = create_acl_tensor(dst);
|
acl_dst = ggml_cann_create_tensor(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
aclnn_add(ctx, acl_src0, acl_src1, acl_dst);
|
aclnn_add(ctx, acl_src0, acl_src1, acl_dst);
|
||||||
|
@ -180,8 +180,8 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
float negative_slope;
|
float negative_slope;
|
||||||
memcpy(&negative_slope, dst->op_params, sizeof(float));
|
memcpy(&negative_slope, dst->op_params, sizeof(float));
|
||||||
|
@ -237,9 +237,9 @@ static void aclnn_concat(ggml_backend_cann_context& ctx,
|
||||||
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src0 = dst->src[0];
|
ggml_tensor* src0 = dst->src[0];
|
||||||
ggml_tensor* src1 = dst->src[1];
|
ggml_tensor* src1 = dst->src[1];
|
||||||
aclTensor* acl_src0 = create_acl_tensor(src0);
|
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
||||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
int64_t concat_dim = 1;
|
int64_t concat_dim = 1;
|
||||||
aclTensor* tensors[] = {acl_src0, acl_src1};
|
aclTensor* tensors[] = {acl_src0, acl_src1};
|
||||||
|
@ -299,7 +299,7 @@ static void aclnn_arange(ggml_backend_cann_context& ctx, aclTensor* acl_dst,
|
||||||
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
int64_t n_elements = ggml_nelements(dst);
|
int64_t n_elements = ggml_nelements(dst);
|
||||||
float start;
|
float start;
|
||||||
|
@ -328,8 +328,8 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
memcpy(&min, dst->op_params, sizeof(float));
|
memcpy(&min, dst->op_params, sizeof(float));
|
||||||
memcpy(&max, (float*)dst->op_params + 1, sizeof(float));
|
memcpy(&max, (float*)dst->op_params + 1, sizeof(float));
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
aclScalar* acl_min = aclCreateScalar(&min, aclDataType::ACL_FLOAT);
|
aclScalar* acl_min = aclCreateScalar(&min, aclDataType::ACL_FLOAT);
|
||||||
aclScalar* acl_max = aclCreateScalar(&max, aclDataType::ACL_FLOAT);
|
aclScalar* acl_max = aclCreateScalar(&max, aclDataType::ACL_FLOAT);
|
||||||
|
@ -361,8 +361,8 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
memcpy(&v, dst->op_params, sizeof(float));
|
memcpy(&v, dst->op_params, sizeof(float));
|
||||||
|
|
||||||
aclScalar* scale = aclCreateScalar(&v, aclDataType::ACL_FLOAT);
|
aclScalar* scale = aclCreateScalar(&v, aclDataType::ACL_FLOAT);
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
|
@ -386,14 +386,14 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
enum ggml_sort_order order = (enum ggml_sort_order)dst->op_params[0];
|
enum ggml_sort_order order = (enum ggml_sort_order)dst->op_params[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
ggml_cann_pool_alloc temp_buffer_allocator(
|
ggml_cann_pool_alloc temp_buffer_allocator(
|
||||||
ctx.pool(), ggml_nelements(dst) * sizeof(int64_t));
|
ctx.pool(), ggml_nelements(dst) * sizeof(int64_t));
|
||||||
void* buffer = temp_buffer_allocator.get();
|
void* buffer = temp_buffer_allocator.get();
|
||||||
aclTensor* tmp_tensor =
|
aclTensor* tmp_tensor =
|
||||||
create_acl_tensor(buffer, ACL_INT64, ggml_type_size(dst->type), dst->ne,
|
ggml_cann_create_tensor(buffer, ACL_INT64, ggml_type_size(dst->type),
|
||||||
dst->nb, GGML_MAX_DIMS);
|
dst->ne, dst->nb, GGML_MAX_DIMS);
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
|
@ -411,7 +411,8 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
aclnnArgsort(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
aclnnArgsort(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||||
|
|
||||||
workspaceSize = 0;
|
workspaceSize = 0;
|
||||||
ACL_CHECK(aclnnCastGetWorkspaceSize(tmp_tensor, type_mapping(dst->type),
|
ACL_CHECK(aclnnCastGetWorkspaceSize(tmp_tensor,
|
||||||
|
ggml_cann_type_mapping(dst->type),
|
||||||
acl_dst, &workspaceSize, &executor));
|
acl_dst, &workspaceSize, &executor));
|
||||||
if (workspaceSize > 0) {
|
if (workspaceSize > 0) {
|
||||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||||
|
@ -428,8 +429,8 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
float eps;
|
float eps;
|
||||||
memcpy(&eps, dst->op_params, sizeof(float));
|
memcpy(&eps, dst->op_params, sizeof(float));
|
||||||
|
@ -460,8 +461,8 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
const float eps = 1e-6f; // TODO: make this a parameter
|
const float eps = 1e-6f; // TODO: make this a parameter
|
||||||
int n_groups = dst->op_params[0];
|
int n_groups = dst->op_params[0];
|
||||||
|
@ -481,9 +482,9 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
|
|
||||||
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes * 2);
|
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes * 2);
|
||||||
void* buffer = temp_buffer_allocator.get();
|
void* buffer = temp_buffer_allocator.get();
|
||||||
aclTensor* acl_mean_out =
|
aclTensor* acl_mean_out = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(buffer, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
buffer, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
||||||
aclTensor* acl_rstd_out = create_acl_tensor(
|
aclTensor* acl_rstd_out = ggml_cann_create_tensor(
|
||||||
(char*)buffer + n_bytes, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
(char*)buffer + n_bytes, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
||||||
|
|
||||||
ACL_CHECK(aclnnGroupNormGetWorkspaceSize(
|
ACL_CHECK(aclnnGroupNormGetWorkspaceSize(
|
||||||
|
@ -516,9 +517,9 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
|
|
||||||
size_t param_nb[] = {ggml_element_size(src0), nb1, nb2, nb3};
|
size_t param_nb[] = {ggml_element_size(src0), nb1, nb2, nb3};
|
||||||
|
|
||||||
aclTensor* acl_dst = create_acl_tensor(
|
aclTensor* acl_dst = ggml_cann_create_tensor(
|
||||||
dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
||||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||||
|
|
||||||
aclScalar* alpha = nullptr;
|
aclScalar* alpha = nullptr;
|
||||||
float alphaValue = 1.0f;
|
float alphaValue = 1.0f;
|
||||||
|
@ -532,7 +533,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
size_t cpy_size = ggml_nbytes(dst);
|
size_t cpy_size = ggml_nbytes(dst);
|
||||||
ACL_CHECK(aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size,
|
ACL_CHECK(aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size,
|
||||||
ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
|
ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
|
||||||
aclTensor* acl_src0 = create_acl_tensor(
|
aclTensor* acl_src0 = ggml_cann_create_tensor(
|
||||||
src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
||||||
ACL_CHECK(aclnnAddGetWorkspaceSize(acl_src0, acl_src1, alpha, acl_dst,
|
ACL_CHECK(aclnnAddGetWorkspaceSize(acl_src0, acl_src1, alpha, acl_dst,
|
||||||
&workspaceSize, &executor));
|
&workspaceSize, &executor));
|
||||||
|
@ -561,10 +562,10 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
|
|
||||||
GGML_ASSERT(dst->ne[0] == 1);
|
GGML_ASSERT(dst->ne[0] == 1);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
int64_t reduce_dims_host[] = {3};
|
int64_t reduce_dims_host[] = {3};
|
||||||
aclIntArray* reduce_dims = aclCreateIntArray(reduce_dims_host, 1);
|
aclIntArray* reduce_dims = aclCreateIntArray(reduce_dims_host, 1);
|
||||||
|
@ -573,9 +574,9 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
void* workspaceAddr = nullptr;
|
void* workspaceAddr = nullptr;
|
||||||
|
|
||||||
ACL_CHECK(aclnnReduceSumGetWorkspaceSize(acl_src, reduce_dims, true,
|
ACL_CHECK(aclnnReduceSumGetWorkspaceSize(
|
||||||
type_mapping(src->type), acl_dst,
|
acl_src, reduce_dims, true, ggml_cann_type_mapping(src->type), acl_dst,
|
||||||
&workspaceSize, &executor));
|
&workspaceSize, &executor));
|
||||||
if (workspaceSize > 0) {
|
if (workspaceSize > 0) {
|
||||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||||
workspaceAddr = workspace_allocator.get();
|
workspaceAddr = workspace_allocator.get();
|
||||||
|
@ -592,9 +593,9 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
||||||
ggml_tensor* dst) {
|
ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
aclTensor* acl_src =
|
aclTensor* acl_src =
|
||||||
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||||
aclTensor* acl_dst =
|
aclTensor* acl_dst =
|
||||||
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||||
|
|
||||||
std::vector<int64_t> output_size{dst->ne[1], dst->ne[0]};
|
std::vector<int64_t> output_size{dst->ne[1], dst->ne[0]};
|
||||||
auto output_size_array = aclCreateIntArray(output_size.data(), 2);
|
auto output_size_array = aclCreateIntArray(output_size.data(), 2);
|
||||||
|
@ -659,8 +660,8 @@ static void aclnn_pad(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
|
|
||||||
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
// padding: value in the array means how much distance will be padding.
|
// padding: value in the array means how much distance will be padding.
|
||||||
// the position of elements in the array means which dirction to padding,
|
// the position of elements in the array means which dirction to padding,
|
||||||
|
@ -694,9 +695,9 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
|
||||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
aclTensor* acl_src =
|
aclTensor* acl_src =
|
||||||
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||||
aclTensor* acl_dst =
|
aclTensor* acl_dst =
|
||||||
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||||
|
|
||||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||||
const int k0 = opts[1];
|
const int k0 = opts[1];
|
||||||
|
@ -732,7 +733,8 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
|
||||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||||
workspaceAddr = workspace_allocator.get();
|
workspaceAddr = workspace_allocator.get();
|
||||||
}
|
}
|
||||||
ACL_CHECK(aclnnAvgPool2d(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
ACL_CHECK(
|
||||||
|
aclnnAvgPool2d(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
@ -760,9 +762,9 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
||||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
aclTensor* acl_src =
|
aclTensor* acl_src =
|
||||||
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||||
aclTensor* acl_dst =
|
aclTensor* acl_dst =
|
||||||
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
|
||||||
|
|
||||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||||
const int k0 = opts[1];
|
const int k0 = opts[1];
|
||||||
|
@ -784,9 +786,9 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
||||||
ggml_cann_pool_alloc temp_buffer_allocator(
|
ggml_cann_pool_alloc temp_buffer_allocator(
|
||||||
ctx.pool(), ggml_nbytes(src) + p0 * 2 + p1 * 2 * src->nb[1]);
|
ctx.pool(), ggml_nbytes(src) + p0 * 2 + p1 * 2 * src->nb[1]);
|
||||||
void* buffer = temp_buffer_allocator.get();
|
void* buffer = temp_buffer_allocator.get();
|
||||||
aclTensor* tmp_tensor =
|
aclTensor* tmp_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(buffer, ACL_FLOAT, ggml_element_size(src), temp_ne,
|
buffer, ACL_FLOAT, ggml_element_size(src), temp_ne, temp_nb,
|
||||||
temp_nb, GGML_MAX_DIMS, ACL_FORMAT_NCHW);
|
GGML_MAX_DIMS, ACL_FORMAT_NCHW);
|
||||||
|
|
||||||
// pad: see padding in ggml_cann_pad()
|
// pad: see padding in ggml_cann_pad()
|
||||||
int64_t paddings[] = {p0, p0, p1, p1, 0, 0, 0, 0};
|
int64_t paddings[] = {p0, p0, p1, p1, 0, 0, 0, 0};
|
||||||
|
@ -819,7 +821,8 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
||||||
workspaceAddr = workspace_allocator.get();
|
workspaceAddr = workspace_allocator.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACL_CHECK(aclnnMaxPool(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
ACL_CHECK(
|
||||||
|
aclnnMaxPool(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
@ -870,14 +873,15 @@ static void cann_copy(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
workspaceAddr = workspace_allocator.get();
|
workspaceAddr = workspace_allocator.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACL_CHECK(aclnnInplaceCopy(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
ACL_CHECK(
|
||||||
|
aclnnInplaceCopy(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
ggml_cann_pool_alloc src_extra_allocator(ctx.pool(), sizeof(ggml_tensor));
|
ggml_cann_pool_alloc src_extra_allocator(ctx.pool(), sizeof(ggml_tensor));
|
||||||
ggml_cann_pool_alloc dst_extra_allocator(ctx.pool(), sizeof(ggml_tensor));
|
ggml_cann_pool_alloc dst_extra_allocator(ctx.pool(), sizeof(ggml_tensor));
|
||||||
|
@ -891,7 +895,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ctx.stream()));
|
ctx.stream()));
|
||||||
|
|
||||||
if ((dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32) &&
|
if ((dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32) &&
|
||||||
ggml_are_same_shape(src, dst)) {
|
ggml_are_same_shape(src, dst)) {
|
||||||
cann_copy(ctx, acl_src, acl_dst);
|
cann_copy(ctx, acl_src, acl_dst);
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
@ -1070,7 +1074,8 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
ACL_CHECK(aclrtMemsetAsync(buffer, n_bytes, 0, n_bytes, ctx.stream()));
|
ACL_CHECK(aclrtMemsetAsync(buffer, n_bytes, 0, n_bytes, ctx.stream()));
|
||||||
aclTensor* zero = create_acl_tensor(buffer, type, type_size, ne, nb, dims);
|
aclTensor* zero =
|
||||||
|
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
|
||||||
return zero;
|
return zero;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1122,8 +1127,8 @@ static aclTensor* aclnn_ones(ggml_backend_cann_context& ctx, void* buffer,
|
||||||
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
float eps;
|
float eps;
|
||||||
memcpy(&eps, dst->op_params, sizeof(float));
|
memcpy(&eps, dst->op_params, sizeof(float));
|
||||||
|
@ -1137,16 +1142,17 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
size_t one_tensor_n_bytes = src->ne[0] * ggml_element_size(src);
|
size_t one_tensor_n_bytes = src->ne[0] * ggml_element_size(src);
|
||||||
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
||||||
|
|
||||||
aclTensor* acl_gamma =
|
aclTensor* acl_gamma = aclnn_ones(
|
||||||
aclnn_ones(ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne,
|
ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne, 1,
|
||||||
1, type_mapping(src->type), ggml_element_size(src));
|
ggml_cann_type_mapping(src->type), ggml_element_size(src));
|
||||||
|
|
||||||
size_t zero_tensor_n_bytes =
|
size_t zero_tensor_n_bytes =
|
||||||
src->ne[1] * src->ne[2] * src->ne[3] * ggml_element_size(src);
|
src->ne[1] * src->ne[2] * src->ne[3] * ggml_element_size(src);
|
||||||
ggml_cann_pool_alloc zero_tensor_allocator(ctx.pool(), zero_tensor_n_bytes);
|
ggml_cann_pool_alloc zero_tensor_allocator(ctx.pool(), zero_tensor_n_bytes);
|
||||||
aclTensor* acl_rstd = aclnn_zero(
|
aclTensor* acl_rstd =
|
||||||
ctx, zero_tensor_allocator.get(), zero_tensor_n_bytes, src->ne,
|
aclnn_zero(ctx, zero_tensor_allocator.get(), zero_tensor_n_bytes,
|
||||||
GGML_MAX_DIMS, type_mapping(src->type), ggml_element_size(src));
|
src->ne, GGML_MAX_DIMS, ggml_cann_type_mapping(src->type),
|
||||||
|
ggml_element_size(src));
|
||||||
|
|
||||||
ACL_CHECK(aclnnRmsNormGetWorkspaceSize(
|
ACL_CHECK(aclnnRmsNormGetWorkspaceSize(
|
||||||
acl_src, acl_gamma, eps, acl_dst, acl_rstd, &workspaceSize, &executor));
|
acl_src, acl_gamma, eps, acl_dst, acl_rstd, &workspaceSize, &executor));
|
||||||
|
@ -1170,8 +1176,8 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
float value) {
|
float value) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
const int n_past = ((int32_t*)dst->op_params)[0];
|
const int n_past = ((int32_t*)dst->op_params)[0];
|
||||||
|
|
||||||
|
@ -1179,9 +1185,10 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
src->ne[3] * ggml_element_size(src);
|
src->ne[3] * ggml_element_size(src);
|
||||||
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
||||||
|
|
||||||
aclTensor* mask_tensor = aclnn_ones(
|
aclTensor* mask_tensor =
|
||||||
ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne,
|
aclnn_ones(ctx, one_tensor_allocator.get(), one_tensor_n_bytes, src->ne,
|
||||||
GGML_MAX_DIMS, type_mapping(src->type), ggml_element_size(src), value);
|
GGML_MAX_DIMS, ggml_cann_type_mapping(src->type),
|
||||||
|
ggml_element_size(src), value);
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
|
@ -1336,7 +1343,7 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
GGML_ASSERT(nb10 == sizeof(float));
|
GGML_ASSERT(nb10 == sizeof(float));
|
||||||
|
|
||||||
// im2col: [N,C,H,W] -> [N, IC * KH * KW, OW * OH]
|
// im2col: [N,C,H,W] -> [N, IC * KH * KW, OW * OH]
|
||||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||||
int64_t tmp_im2col_ne[] = {OW * OH, IC * KH * KW, N};
|
int64_t tmp_im2col_ne[] = {OW * OH, IC * KH * KW, N};
|
||||||
size_t tmp_im2col_nb[GGML_MAX_DIMS - 1];
|
size_t tmp_im2col_nb[GGML_MAX_DIMS - 1];
|
||||||
|
|
||||||
|
@ -1351,9 +1358,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_cann_pool_alloc im2col_allocator(
|
ggml_cann_pool_alloc im2col_allocator(
|
||||||
ctx.pool(), ggml_nelements(dst) * ggml_element_size(src1));
|
ctx.pool(), ggml_nelements(dst) * ggml_element_size(src1));
|
||||||
void* tmp_im2col_buffer = im2col_allocator.get();
|
void* tmp_im2col_buffer = im2col_allocator.get();
|
||||||
aclTensor* tmp_im2col_tensor = create_acl_tensor(
|
aclTensor* tmp_im2col_tensor = ggml_cann_create_tensor(
|
||||||
tmp_im2col_buffer, type_mapping(src1->type), ggml_type_size(src1->type),
|
tmp_im2col_buffer, ggml_cann_type_mapping(src1->type),
|
||||||
tmp_im2col_ne, tmp_im2col_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
ggml_type_size(src1->type), tmp_im2col_ne, tmp_im2col_nb,
|
||||||
|
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
||||||
|
|
||||||
std::vector<int64_t> kernel_dims = {KH, KW};
|
std::vector<int64_t> kernel_dims = {KH, KW};
|
||||||
std::vector<int64_t> dilation_size = {d1, d0};
|
std::vector<int64_t> dilation_size = {d1, d0};
|
||||||
|
@ -1377,7 +1385,8 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
workspaceAddr = workspace_allocator.get();
|
workspaceAddr = workspace_allocator.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACL_CHECK(aclnnIm2col(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
ACL_CHECK(
|
||||||
|
aclnnIm2col(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||||
|
|
||||||
// Cast if dst is f16.
|
// Cast if dst is f16.
|
||||||
aclTensor* tmp_cast_tensor = nullptr;
|
aclTensor* tmp_cast_tensor = nullptr;
|
||||||
|
@ -1391,18 +1400,19 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
temp_cast_nb[i] = temp_cast_nb[i - 1] * tmp_im2col_ne[i - 1];
|
temp_cast_nb[i] = temp_cast_nb[i - 1] * tmp_im2col_ne[i - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp_cast_tensor = create_acl_tensor(
|
tmp_cast_tensor = ggml_cann_create_tensor(
|
||||||
tmp_cast_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_cast_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_im2col_ne, temp_cast_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_im2col_ne, temp_cast_nb,
|
||||||
|
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
||||||
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor,
|
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor,
|
||||||
type_mapping(dst->type));
|
ggml_cann_type_mapping(dst->type));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Permute: [N, IC * KH * KW, OW * OH] -> [N, OW * OH, IC * KH * KW]
|
// Permute: [N, IC * KH * KW, OW * OH] -> [N, OW * OH, IC * KH * KW]
|
||||||
int64_t dst_ne[] = {dst->ne[0], dst->ne[1] * dst->ne[2], dst->ne[3]};
|
int64_t dst_ne[] = {dst->ne[0], dst->ne[1] * dst->ne[2], dst->ne[3]};
|
||||||
size_t dst_nb[] = {dst->nb[0], dst->nb[1], dst->nb[3]};
|
size_t dst_nb[] = {dst->nb[0], dst->nb[1], dst->nb[3]};
|
||||||
aclTensor* acl_dst =
|
aclTensor* acl_dst =
|
||||||
create_acl_tensor(dst, dst_ne, dst_nb, GGML_MAX_DIMS - 1);
|
ggml_cann_create_tensor(dst, dst_ne, dst_nb, GGML_MAX_DIMS - 1);
|
||||||
|
|
||||||
int64_t permute_dim[] = {0, 2, 1};
|
int64_t permute_dim[] = {0, 2, 1};
|
||||||
if (src1->type != dst->type) {
|
if (src1->type != dst->type) {
|
||||||
|
@ -1517,7 +1527,8 @@ static void aclnn_muls(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
* \f]
|
* \f]
|
||||||
*
|
*
|
||||||
* @param ctx The context for the CANN backend operations.
|
* @param ctx The context for the CANN backend operations.
|
||||||
* @param acl_src The source tensor where the multiplication result will be stored.
|
* @param acl_src The source tensor where the multiplication result will be
|
||||||
|
* stored.
|
||||||
* @param acl_other The tensor whose elements will be multiplied with `acl_src`.
|
* @param acl_other The tensor whose elements will be multiplied with `acl_src`.
|
||||||
*/
|
*/
|
||||||
static void aclnn_inplace_mul(ggml_backend_cann_context& ctx,
|
static void aclnn_inplace_mul(ggml_backend_cann_context& ctx,
|
||||||
|
@ -1553,9 +1564,8 @@ static void aclnn_inplace_mul(ggml_backend_cann_context& ctx,
|
||||||
* @param acl_other The second tensor for element-wise multiplication.
|
* @param acl_other The second tensor for element-wise multiplication.
|
||||||
* @param acl_dst The destination tensor where the result will be stored.
|
* @param acl_dst The destination tensor where the result will be stored.
|
||||||
*/
|
*/
|
||||||
static void aclnn_mul(ggml_backend_cann_context& ctx,
|
static void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
aclTensor* acl_src, aclTensor* acl_other,
|
aclTensor* acl_other, aclTensor* acl_dst) {
|
||||||
aclTensor* acl_dst) {
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
void* workspaceAddr = nullptr;
|
void* workspaceAddr = nullptr;
|
||||||
|
@ -1573,16 +1583,16 @@ static void aclnn_mul(ggml_backend_cann_context& ctx,
|
||||||
/**
|
/**
|
||||||
* @brief Applies element-wise cosine function to the elements of a tensor.
|
* @brief Applies element-wise cosine function to the elements of a tensor.
|
||||||
*
|
*
|
||||||
* This function computes the cosine of each element in the source tensor `acl_src`
|
* This function computes the cosine of each element in the source tensor
|
||||||
* and stores the result in the destination tensor `acl_dst`.
|
* `acl_src` and stores the result in the destination tensor `acl_dst`. The
|
||||||
* The operation is defined as:
|
* operation is defined as: \f[ \text {acl_dst }_i=\cos \left(\text {acl_src
|
||||||
* \f[
|
* }_i\right) \f]
|
||||||
* \text {acl_dst }_i=\cos \left(\text {acl_src }_i\right)
|
|
||||||
* \f]
|
|
||||||
*
|
*
|
||||||
* @param ctx The context for the CANN backend operations.
|
* @param ctx The context for the CANN backend operations.
|
||||||
* @param acl_src The source tensor on which the cosine function will be applied.
|
* @param acl_src The source tensor on which the cosine function will be
|
||||||
* @param acl_dst The destination tensor where the cosine results will be stored.
|
* applied.
|
||||||
|
* @param acl_dst The destination tensor where the cosine results will be
|
||||||
|
* stored.
|
||||||
*/
|
*/
|
||||||
static void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
static void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
aclTensor* acl_dst) {
|
aclTensor* acl_dst) {
|
||||||
|
@ -1603,7 +1613,8 @@ static void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
/**
|
/**
|
||||||
* @brief Applies element-wise sine function to the elements of a tensor.
|
* @brief Applies element-wise sine function to the elements of a tensor.
|
||||||
*
|
*
|
||||||
* This function computes the sine of each element in the source tensor `acl_src`
|
* This function computes the sine of each element in the source tensor
|
||||||
|
`acl_src`
|
||||||
* and stores the result in the destination tensor `acl_dst`.
|
* and stores the result in the destination tensor `acl_dst`.
|
||||||
* The operation is defined as:
|
* The operation is defined as:
|
||||||
* \f[
|
* \f[
|
||||||
|
@ -1641,7 +1652,7 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
||||||
const int max_period = dst->op_params[1];
|
const int max_period = dst->op_params[1];
|
||||||
int half = dim / 2;
|
int half = dim / 2;
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
|
|
||||||
// arange: [0, ..., half)
|
// arange: [0, ..., half)
|
||||||
float start = 0;
|
float start = 0;
|
||||||
|
@ -1653,9 +1664,10 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
||||||
|
|
||||||
ggml_cann_pool_alloc arange_allocator(ctx.pool(), half * sizeof(dst->type));
|
ggml_cann_pool_alloc arange_allocator(ctx.pool(), half * sizeof(dst->type));
|
||||||
void* tmp_arange_buffer = arange_allocator.get();
|
void* tmp_arange_buffer = arange_allocator.get();
|
||||||
aclTensor* tmp_arange_tensor = create_acl_tensor(
|
aclTensor* tmp_arange_tensor = ggml_cann_create_tensor(
|
||||||
tmp_arange_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_arange_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_arange_ne, tmp_arange_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_arange_ne, tmp_arange_nb,
|
||||||
|
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
|
|
||||||
aclnn_arange(ctx, tmp_arange_tensor, start, stop, step, n_elements_arange);
|
aclnn_arange(ctx, tmp_arange_tensor, start, stop, step, n_elements_arange);
|
||||||
|
|
||||||
|
@ -1675,9 +1687,10 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
||||||
|
|
||||||
ggml_cann_pool_alloc permute_allocator(ctx.pool(), ggml_nbytes(src));
|
ggml_cann_pool_alloc permute_allocator(ctx.pool(), ggml_nbytes(src));
|
||||||
void* tmp_permute_buffer = permute_allocator.get();
|
void* tmp_permute_buffer = permute_allocator.get();
|
||||||
aclTensor* tmp_permute_tenosr = create_acl_tensor(
|
aclTensor* tmp_permute_tenosr = ggml_cann_create_tensor(
|
||||||
tmp_permute_buffer, type_mapping(src->type), ggml_type_size(src->type),
|
tmp_permute_buffer, ggml_cann_type_mapping(src->type),
|
||||||
tmp_permute_ne, tmp_permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
ggml_type_size(src->type), tmp_permute_ne, tmp_permute_nb,
|
||||||
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||||
int64_t permute_dim[] = {0, 1, 3, 2};
|
int64_t permute_dim[] = {0, 1, 3, 2};
|
||||||
int64_t num_dims = 4;
|
int64_t num_dims = 4;
|
||||||
aclnn_permute(ctx, acl_src, tmp_permute_tenosr, permute_dim, num_dims);
|
aclnn_permute(ctx, acl_src, tmp_permute_tenosr, permute_dim, num_dims);
|
||||||
|
@ -1697,19 +1710,20 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
||||||
ggml_cann_pool_alloc mul_allocator(
|
ggml_cann_pool_alloc mul_allocator(
|
||||||
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
||||||
void* tmp_mul_buffer = mul_allocator.get();
|
void* tmp_mul_buffer = mul_allocator.get();
|
||||||
aclTensor* tmp_mul_tensor = create_acl_tensor(
|
aclTensor* tmp_mul_tensor = ggml_cann_create_tensor(
|
||||||
tmp_mul_buffer, type_mapping(src->type), ggml_type_size(src->type),
|
tmp_mul_buffer, ggml_cann_type_mapping(src->type),
|
||||||
tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
ggml_type_size(src->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
||||||
aclnn_mul(ctx, tmp_permute_tenosr, tmp_arange_tensor,
|
ACL_FORMAT_ND);
|
||||||
tmp_mul_tensor);
|
aclnn_mul(ctx, tmp_permute_tenosr, tmp_arange_tensor, tmp_mul_tensor);
|
||||||
|
|
||||||
// cos
|
// cos
|
||||||
ggml_cann_pool_alloc cos_allocator(
|
ggml_cann_pool_alloc cos_allocator(
|
||||||
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
||||||
void* tmp_cos_buffer = cos_allocator.get();
|
void* tmp_cos_buffer = cos_allocator.get();
|
||||||
aclTensor* tmp_cos_tensor = create_acl_tensor(
|
aclTensor* tmp_cos_tensor = ggml_cann_create_tensor(
|
||||||
tmp_cos_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_cos_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
||||||
|
ACL_FORMAT_ND);
|
||||||
|
|
||||||
aclnn_cos(ctx, tmp_mul_tensor, tmp_cos_tensor);
|
aclnn_cos(ctx, tmp_mul_tensor, tmp_cos_tensor);
|
||||||
|
|
||||||
|
@ -1717,15 +1731,16 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
||||||
ggml_cann_pool_alloc sin_allocator(
|
ggml_cann_pool_alloc sin_allocator(
|
||||||
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
ctx.pool(), mul_nelements * ggml_type_size(src->type));
|
||||||
void* tmp_sin_buffer = sin_allocator.get();
|
void* tmp_sin_buffer = sin_allocator.get();
|
||||||
aclTensor* tmp_sin_tensor = create_acl_tensor(
|
aclTensor* tmp_sin_tensor = ggml_cann_create_tensor(
|
||||||
tmp_sin_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_sin_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
||||||
|
ACL_FORMAT_ND);
|
||||||
|
|
||||||
aclnn_sin(ctx, tmp_mul_tensor, tmp_sin_tensor);
|
aclnn_sin(ctx, tmp_mul_tensor, tmp_sin_tensor);
|
||||||
|
|
||||||
// concat
|
// concat
|
||||||
int64_t concat_dim = 3;
|
int64_t concat_dim = 3;
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
aclTensor* tensors[] = {tmp_cos_tensor, tmp_sin_tensor};
|
aclTensor* tensors[] = {tmp_cos_tensor, tmp_sin_tensor};
|
||||||
aclTensorList* tensorList = aclCreateTensorList(tensors, 2);
|
aclTensorList* tensorList = aclCreateTensorList(tensors, 2);
|
||||||
aclnn_concat(ctx, tensorList, acl_dst, concat_dim);
|
aclnn_concat(ctx, tensorList, acl_dst, concat_dim);
|
||||||
|
@ -1816,7 +1831,8 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
|
||||||
* @param acl_dst The destination tensor where the result will be stored.
|
* @param acl_dst The destination tensor where the result will be stored.
|
||||||
* @param n_head The number of attention heads.
|
* @param n_head The number of attention heads.
|
||||||
* @param src_ne The dimensions of the source tensor.
|
* @param src_ne The dimensions of the source tensor.
|
||||||
* @param src_nb0 The byte size of the first dimension of the source tensor.
|
* @param src_nb0 The byte size of the first dimension of the source
|
||||||
|
tensor.
|
||||||
* @param max_bias The maximum bias value used in the Alibi mechanism.
|
* @param max_bias The maximum bias value used in the Alibi mechanism.
|
||||||
* @param dst The destination tensor object for additional metadata.
|
* @param dst The destination tensor object for additional metadata.
|
||||||
*
|
*
|
||||||
|
@ -1858,9 +1874,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
|
|
||||||
int64_t tmp_arange1_ne[] = {n_heads_log2_floor};
|
int64_t tmp_arange1_ne[] = {n_heads_log2_floor};
|
||||||
size_t tmp_arange1_nb[] = {sizeof(dst->type)};
|
size_t tmp_arange1_nb[] = {sizeof(dst->type)};
|
||||||
aclTensor* tmp_arange1_tensor = create_acl_tensor(
|
aclTensor* tmp_arange1_tensor = ggml_cann_create_tensor(
|
||||||
tmp_arange_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_arange_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_arange1_ne, tmp_arange1_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_arange1_ne, tmp_arange1_nb,
|
||||||
|
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
|
|
||||||
aclnn_arange(ctx, tmp_arange1_tensor, start, stop, step, n_elements_arange);
|
aclnn_arange(ctx, tmp_arange1_tensor, start, stop, step, n_elements_arange);
|
||||||
|
|
||||||
|
@ -1874,11 +1891,11 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
int64_t tmp_arange2_ne[] = {ne2_ne3 - n_heads_log2_floor};
|
int64_t tmp_arange2_ne[] = {ne2_ne3 - n_heads_log2_floor};
|
||||||
size_t tmp_arange2_nb[] = {sizeof(dst->type)};
|
size_t tmp_arange2_nb[] = {sizeof(dst->type)};
|
||||||
|
|
||||||
aclTensor* tmp_arange2_tensor = create_acl_tensor(
|
aclTensor* tmp_arange2_tensor = ggml_cann_create_tensor(
|
||||||
(char*)tmp_arange_buffer +
|
(char*)tmp_arange_buffer +
|
||||||
n_heads_log2_floor * ggml_type_size(dst->type),
|
n_heads_log2_floor * ggml_type_size(dst->type),
|
||||||
type_mapping(dst->type), ggml_type_size(dst->type), tmp_arange2_ne,
|
ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
|
||||||
tmp_arange2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
tmp_arange2_ne, tmp_arange2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
aclnn_arange(ctx, tmp_arange2_tensor, start, stop, step,
|
aclnn_arange(ctx, tmp_arange2_tensor, start, stop, step,
|
||||||
n_elements_arange);
|
n_elements_arange);
|
||||||
}
|
}
|
||||||
|
@ -1889,9 +1906,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
void* tmp_mk_base_buffer = mk_base_allocator.get();
|
void* tmp_mk_base_buffer = mk_base_allocator.get();
|
||||||
int64_t tmp_mk_base1_ne[] = {n_heads_log2_floor};
|
int64_t tmp_mk_base1_ne[] = {n_heads_log2_floor};
|
||||||
size_t tmp_mk_base1_nb[] = {sizeof(dst->type)};
|
size_t tmp_mk_base1_nb[] = {sizeof(dst->type)};
|
||||||
aclTensor* tmp_mk_base1_tensor = create_acl_tensor(
|
aclTensor* tmp_mk_base1_tensor = ggml_cann_create_tensor(
|
||||||
tmp_mk_base_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_mk_base_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_mk_base1_ne, tmp_mk_base1_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_mk_base1_ne, tmp_mk_base1_nb,
|
||||||
|
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
|
|
||||||
aclnn_fill_scalar(ctx, m0, tmp_mk_base1_tensor);
|
aclnn_fill_scalar(ctx, m0, tmp_mk_base1_tensor);
|
||||||
|
|
||||||
|
@ -1899,23 +1917,25 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
if (n_heads_log2_floor < ne2_ne3) {
|
if (n_heads_log2_floor < ne2_ne3) {
|
||||||
int64_t tmp_mk_base2_ne[] = {ne2_ne3 - n_heads_log2_floor};
|
int64_t tmp_mk_base2_ne[] = {ne2_ne3 - n_heads_log2_floor};
|
||||||
size_t tmp_mk_base2_nb[] = {sizeof(dst->type)};
|
size_t tmp_mk_base2_nb[] = {sizeof(dst->type)};
|
||||||
aclTensor* tmp_mk_base2_tensor = create_acl_tensor(
|
aclTensor* tmp_mk_base2_tensor = ggml_cann_create_tensor(
|
||||||
(char*)tmp_mk_base_buffer +
|
(char*)tmp_mk_base_buffer +
|
||||||
n_heads_log2_floor * ggml_type_size(dst->type),
|
n_heads_log2_floor * ggml_type_size(dst->type),
|
||||||
type_mapping(dst->type), ggml_type_size(dst->type), tmp_mk_base2_ne,
|
ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
|
||||||
tmp_mk_base2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
tmp_mk_base2_ne, tmp_mk_base2_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
aclnn_fill_scalar(ctx, m1, tmp_mk_base2_tensor);
|
aclnn_fill_scalar(ctx, m1, tmp_mk_base2_tensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
// init mk
|
// init mk
|
||||||
int64_t tmp_mk_base_ne[] = {ne2_ne3};
|
int64_t tmp_mk_base_ne[] = {ne2_ne3};
|
||||||
size_t tmp_mk_base_nb[] = {sizeof(dst->type)};
|
size_t tmp_mk_base_nb[] = {sizeof(dst->type)};
|
||||||
aclTensor* tmp_mk_base_tensor = create_acl_tensor(
|
aclTensor* tmp_mk_base_tensor = ggml_cann_create_tensor(
|
||||||
tmp_mk_base_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_mk_base_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_mk_base_ne, tmp_mk_base_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_mk_base_ne, tmp_mk_base_nb,
|
||||||
aclTensor* tmp_arange_tensor = create_acl_tensor(
|
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
tmp_arange_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
aclTensor* tmp_arange_tensor = ggml_cann_create_tensor(
|
||||||
tmp_mk_base_ne, tmp_mk_base_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
tmp_arange_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
|
ggml_type_size(dst->type), tmp_mk_base_ne, tmp_mk_base_nb,
|
||||||
|
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
|
||||||
aclnn_pow_tensor_tensor(ctx, tmp_mk_base_tensor, tmp_arange_tensor);
|
aclnn_pow_tensor_tensor(ctx, tmp_mk_base_tensor, tmp_arange_tensor);
|
||||||
|
|
||||||
// reshape mk
|
// reshape mk
|
||||||
|
@ -1925,9 +1945,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||||
tmp_mk_nb[i] = tmp_mk_nb[i - 1] * tmp_mk_ne[i - 1];
|
tmp_mk_nb[i] = tmp_mk_nb[i - 1] * tmp_mk_ne[i - 1];
|
||||||
}
|
}
|
||||||
aclTensor* tmp_mk_tensor = create_acl_tensor(
|
aclTensor* tmp_mk_tensor = ggml_cann_create_tensor(
|
||||||
tmp_mk_base_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_mk_base_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_mk_ne, tmp_mk_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_mk_ne, tmp_mk_nb, GGML_MAX_DIMS,
|
||||||
|
ACL_FORMAT_ND);
|
||||||
|
|
||||||
// acl_position * mk
|
// acl_position * mk
|
||||||
int64_t tmp_output_ne[] = {src_ne[0], src_ne[1], src_ne[2], src_ne[3]};
|
int64_t tmp_output_ne[] = {src_ne[0], src_ne[1], src_ne[2], src_ne[3]};
|
||||||
|
@ -1938,9 +1959,10 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
}
|
}
|
||||||
ggml_cann_pool_alloc output_allocator(ctx.pool(), ggml_nbytes(dst));
|
ggml_cann_pool_alloc output_allocator(ctx.pool(), ggml_nbytes(dst));
|
||||||
void* tmp_output_buffer = output_allocator.get();
|
void* tmp_output_buffer = output_allocator.get();
|
||||||
aclTensor* tmp_output_tensor = create_acl_tensor(
|
aclTensor* tmp_output_tensor = ggml_cann_create_tensor(
|
||||||
tmp_output_buffer, type_mapping(dst->type), ggml_type_size(dst->type),
|
tmp_output_buffer, ggml_cann_type_mapping(dst->type),
|
||||||
tmp_output_ne, tmp_output_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
ggml_type_size(dst->type), tmp_output_ne, tmp_output_nb, GGML_MAX_DIMS,
|
||||||
|
ACL_FORMAT_ND);
|
||||||
aclnn_mul(ctx, acl_position, tmp_mk_tensor, tmp_output_tensor);
|
aclnn_mul(ctx, acl_position, tmp_mk_tensor, tmp_output_tensor);
|
||||||
|
|
||||||
// add
|
// add
|
||||||
|
@ -2031,8 +2053,8 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src0 = dst->src[0];
|
ggml_tensor* src0 = dst->src[0];
|
||||||
ggml_tensor* src1 = dst->src[1]; // mask
|
ggml_tensor* src1 = dst->src[1]; // mask
|
||||||
|
|
||||||
aclTensor* acl_src0 = create_acl_tensor(src0);
|
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
float scale = 1.0f;
|
float scale = 1.0f;
|
||||||
float max_bias = 0.0f;
|
float max_bias = 0.0f;
|
||||||
|
@ -2046,7 +2068,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
size_t n_bytes = ggml_nbytes(src0);
|
size_t n_bytes = ggml_nbytes(src0);
|
||||||
ggml_cann_pool_alloc mul_scale_allocator(ctx.pool(), n_bytes);
|
ggml_cann_pool_alloc mul_scale_allocator(ctx.pool(), n_bytes);
|
||||||
void* input_mul_scale_buffer = mul_scale_allocator.get();
|
void* input_mul_scale_buffer = mul_scale_allocator.get();
|
||||||
aclTensor* acl_input_mul_scale_tensor = create_acl_tensor(
|
aclTensor* acl_input_mul_scale_tensor = ggml_cann_create_tensor(
|
||||||
input_mul_scale_buffer, ACL_FLOAT, ggml_type_size(src0->type), src0->ne,
|
input_mul_scale_buffer, ACL_FLOAT, ggml_type_size(src0->type), src0->ne,
|
||||||
src0->nb, GGML_MAX_DIMS);
|
src0->nb, GGML_MAX_DIMS);
|
||||||
|
|
||||||
|
@ -2069,18 +2091,15 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
}
|
}
|
||||||
src1_fp32_allocator.alloc(n_bytes);
|
src1_fp32_allocator.alloc(n_bytes);
|
||||||
void* src1_fp32_buffer = src1_fp32_allocator.get();
|
void* src1_fp32_buffer = src1_fp32_allocator.get();
|
||||||
acl_src1_fp32_tensor = create_acl_tensor(src1_fp32_buffer,
|
acl_src1_fp32_tensor = ggml_cann_create_tensor(
|
||||||
ACL_FLOAT,
|
src1_fp32_buffer, ACL_FLOAT, sizeof(float), src1->ne,
|
||||||
sizeof(float),
|
src1_fp32_nb, GGML_MAX_DIMS);
|
||||||
src1->ne,
|
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
||||||
src1_fp32_nb,
|
|
||||||
GGML_MAX_DIMS);
|
|
||||||
aclTensor* acl_src1 = create_acl_tensor(src1);
|
|
||||||
aclnn_cast(ctx, acl_src1, acl_src1_fp32_tensor, ACL_FLOAT);
|
aclnn_cast(ctx, acl_src1, acl_src1_fp32_tensor, ACL_FLOAT);
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||||
} else {
|
} else {
|
||||||
acl_src1_fp32_tensor = create_acl_tensor(src1);
|
acl_src1_fp32_tensor = ggml_cann_create_tensor(src1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// broadcast the mask across rows, only use ne11 of ne01 in mask
|
// broadcast the mask across rows, only use ne11 of ne01 in mask
|
||||||
|
@ -2092,7 +2111,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||||
tmp_mask_nb[i] = tmp_mask_nb[i - 1] * tmp_mask_ne[i - 1];
|
tmp_mask_nb[i] = tmp_mask_nb[i - 1] * tmp_mask_ne[i - 1];
|
||||||
}
|
}
|
||||||
tmp_mask_tensor = create_acl_tensor(
|
tmp_mask_tensor = ggml_cann_create_tensor(
|
||||||
src1->data, ACL_FLOAT, sizeof(float), tmp_mask_ne, tmp_mask_nb,
|
src1->data, ACL_FLOAT, sizeof(float), tmp_mask_ne, tmp_mask_nb,
|
||||||
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||||
}
|
}
|
||||||
|
@ -2104,7 +2123,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
n_bytes = ggml_nbytes(dst);
|
n_bytes = ggml_nbytes(dst);
|
||||||
ggml_cann_pool_alloc output_allocator(ctx.pool(), n_bytes);
|
ggml_cann_pool_alloc output_allocator(ctx.pool(), n_bytes);
|
||||||
void* output_buffer = output_allocator.get();
|
void* output_buffer = output_allocator.get();
|
||||||
aclTensor* alibi_output_tensor = create_acl_tensor(
|
aclTensor* alibi_output_tensor = ggml_cann_create_tensor(
|
||||||
output_buffer, ACL_FLOAT, ggml_type_size(dst->type), dst->ne,
|
output_buffer, ACL_FLOAT, ggml_type_size(dst->type), dst->ne,
|
||||||
dst->nb, GGML_MAX_DIMS);
|
dst->nb, GGML_MAX_DIMS);
|
||||||
if (max_bias <= 0.0f) {
|
if (max_bias <= 0.0f) {
|
||||||
|
@ -2116,18 +2135,16 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
aclnn_add(ctx, acl_src1_fp32_tensor, acl_input_mul_scale_tensor,
|
aclnn_add(ctx, acl_src1_fp32_tensor, acl_input_mul_scale_tensor,
|
||||||
alibi_output_tensor);
|
alibi_output_tensor);
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
// slope != 1.0
|
// slope != 1.0
|
||||||
if (tmp_mask_tensor) {
|
if (tmp_mask_tensor) {
|
||||||
aclnn_alibi(ctx, acl_input_mul_scale_tensor, tmp_mask_tensor,
|
aclnn_alibi(ctx, acl_input_mul_scale_tensor, tmp_mask_tensor,
|
||||||
alibi_output_tensor, n_head, src0->ne, src_nb0, max_bias,
|
alibi_output_tensor, n_head, src0->ne, src_nb0,
|
||||||
dst);
|
max_bias, dst);
|
||||||
}
|
} else {
|
||||||
else {
|
aclnn_alibi(ctx, acl_input_mul_scale_tensor,
|
||||||
aclnn_alibi(ctx, acl_input_mul_scale_tensor, acl_src1_fp32_tensor,
|
acl_src1_fp32_tensor, alibi_output_tensor, n_head,
|
||||||
alibi_output_tensor, n_head, src0->ne, src_nb0, max_bias,
|
src0->ne, src_nb0, max_bias, dst);
|
||||||
dst);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2277,7 +2294,8 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
|
||||||
workspaceAddr = workspace_allocator.get();
|
workspaceAddr = workspace_allocator.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACL_CHECK(aclnnMatmul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
ACL_CHECK(
|
||||||
|
aclnnMatmul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2310,10 +2328,10 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
|
||||||
bcast_weight_nb[4], bcast_weight_nb[5]};
|
bcast_weight_nb[4], bcast_weight_nb[5]};
|
||||||
|
|
||||||
aclTensor* acl_weight_tensor =
|
aclTensor* acl_weight_tensor =
|
||||||
create_acl_tensor(weight, transpose_ne, transpose_nb, bcast_dims);
|
ggml_cann_create_tensor(weight, transpose_ne, transpose_nb, bcast_dims);
|
||||||
aclTensor* acl_input_tensor =
|
aclTensor* acl_input_tensor =
|
||||||
create_acl_tensor(input, BCAST_MUL_MAT_PARAM(input));
|
ggml_cann_create_tensor(input, BCAST_MUL_MAT_PARAM(input));
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst, BCAST_MUL_MAT_PARAM(dst));
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst, BCAST_MUL_MAT_PARAM(dst));
|
||||||
aclnn_mat_mul(ctx, acl_input_tensor, acl_weight_tensor, acl_dst);
|
aclnn_mat_mul(ctx, acl_input_tensor, acl_weight_tensor, acl_dst);
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_weight_tensor));
|
ACL_CHECK(aclDestroyTensor(acl_weight_tensor));
|
||||||
|
@ -2364,7 +2382,7 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
||||||
size_t input_stride = input_elem_size * src1->ne[0] * src1->ne[1];
|
size_t input_stride = input_elem_size * src1->ne[0] * src1->ne[1];
|
||||||
|
|
||||||
if (src1->type != GGML_TYPE_F16) {
|
if (src1->type != GGML_TYPE_F16) {
|
||||||
aclTensor* acl_src1_tensor = create_acl_tensor(src1);
|
aclTensor* acl_src1_tensor = ggml_cann_create_tensor(src1);
|
||||||
ggml_cann_pool_alloc input_alloctor(
|
ggml_cann_pool_alloc input_alloctor(
|
||||||
ctx.pool(), ggml_nelements(src1) * input_elem_size);
|
ctx.pool(), ggml_nelements(src1) * input_elem_size);
|
||||||
input_buffer = input_alloctor.get();
|
input_buffer = input_alloctor.get();
|
||||||
|
@ -2376,9 +2394,9 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
||||||
input_cast_nb[i] = input_cast_nb[i - 1] * input_cast_ne[i - 1];
|
input_cast_nb[i] = input_cast_nb[i - 1] * input_cast_ne[i - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
aclTensor* acl_input_tensor =
|
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(input_buffer, ACL_FLOAT16, input_elem_size,
|
input_buffer, ACL_FLOAT16, input_elem_size, input_cast_ne,
|
||||||
input_cast_ne, input_cast_nb, GGML_MAX_DIMS);
|
input_cast_nb, GGML_MAX_DIMS);
|
||||||
aclnn_cast(ctx, acl_src1_tensor, acl_input_tensor, ACL_FLOAT16);
|
aclnn_cast(ctx, acl_src1_tensor, acl_input_tensor, ACL_FLOAT16);
|
||||||
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src1_tensor));
|
ACL_CHECK(aclDestroyTensor(acl_src1_tensor));
|
||||||
|
@ -2408,16 +2426,16 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
||||||
int64_t batch1 = n1 * src1->ne[2] + c1;
|
int64_t batch1 = n1 * src1->ne[2] + c1;
|
||||||
int64_t batch0 = n0 * src0->ne[2] + c0;
|
int64_t batch0 = n0 * src0->ne[2] + c0;
|
||||||
|
|
||||||
aclTensor* acl_input_tensor = create_acl_tensor(
|
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||||
(char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
|
(char*)input_buffer + batch1 * input_stride, ACL_FLOAT16,
|
||||||
input_elem_size, input_ne, input_nb, 2);
|
input_elem_size, input_ne, input_nb, 2);
|
||||||
aclTensor* acl_weight_tensor = create_acl_tensor(
|
aclTensor* acl_weight_tensor = ggml_cann_create_tensor(
|
||||||
(char*)src0->data + batch0 * weight_stride, ACL_INT8,
|
(char*)src0->data + batch0 * weight_stride, ACL_INT8,
|
||||||
weight_elem_size, weight_ne, weight_nb, 2);
|
weight_elem_size, weight_ne, weight_nb, 2);
|
||||||
aclTensor* acl_scale_tensor = create_acl_tensor(
|
aclTensor* acl_scale_tensor = ggml_cann_create_tensor(
|
||||||
scale_offset + batch0 * scale_stride, ACL_FLOAT16,
|
scale_offset + batch0 * scale_stride, ACL_FLOAT16,
|
||||||
scale_elem_size, scale_ne, scale_nb, 2);
|
scale_elem_size, scale_ne, scale_nb, 2);
|
||||||
aclTensor* acl_output_tensor = create_acl_tensor(
|
aclTensor* acl_output_tensor = ggml_cann_create_tensor(
|
||||||
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
|
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
|
||||||
output_elem_size, output_ne, output_nb, 2);
|
output_elem_size, output_ne, output_nb, 2);
|
||||||
|
|
||||||
|
@ -2451,9 +2469,9 @@ static void ggml_cann_mul_mat_q8_0(ggml_backend_cann_context& ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
aclTensor* acl_output_tensor =
|
aclTensor* acl_output_tensor =
|
||||||
create_acl_tensor(output_buffer, ACL_FLOAT16, output_elem_size,
|
ggml_cann_create_tensor(output_buffer, ACL_FLOAT16, output_elem_size,
|
||||||
output_cast_ne, output_cast_nb, GGML_MAX_DIMS);
|
output_cast_ne, output_cast_nb, GGML_MAX_DIMS);
|
||||||
aclTensor* acl_dst_tensor = create_acl_tensor(dst);
|
aclTensor* acl_dst_tensor = ggml_cann_create_tensor(dst);
|
||||||
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, ACL_FLOAT);
|
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, ACL_FLOAT);
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_output_tensor));
|
ACL_CHECK(aclDestroyTensor(acl_output_tensor));
|
||||||
|
@ -2575,8 +2593,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
arange_length * sizeof(float_t)};
|
arange_length * sizeof(float_t)};
|
||||||
|
|
||||||
aclTensor* acl_arange_tensor =
|
aclTensor* acl_arange_tensor =
|
||||||
create_acl_tensor(arange_buffer, ACL_FLOAT, sizeof(float_t), arange_ne,
|
ggml_cann_create_tensor(arange_buffer, ACL_FLOAT, sizeof(float_t),
|
||||||
arange_nb, GGML_MAX_DIMS);
|
arange_ne, arange_nb, GGML_MAX_DIMS);
|
||||||
float start = 0;
|
float start = 0;
|
||||||
float step = 1;
|
float step = 1;
|
||||||
float stop = src0->ne[0] / 2;
|
float stop = src0->ne[0] / 2;
|
||||||
|
@ -2604,9 +2622,9 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t),
|
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t),
|
||||||
sizeof(int32_t) * position_length,
|
sizeof(int32_t) * position_length,
|
||||||
sizeof(int32_t) * position_length};
|
sizeof(int32_t) * position_length};
|
||||||
aclTensor* acl_position_tensor = create_acl_tensor(
|
aclTensor* acl_position_tensor = ggml_cann_create_tensor(
|
||||||
src1->data, type_mapping(src1->type), ggml_type_size(src1->type),
|
src1->data, ggml_cann_type_mapping(src1->type),
|
||||||
position_ne, position_nb, GGML_MAX_DIMS);
|
ggml_type_size(src1->type), position_ne, position_nb, GGML_MAX_DIMS);
|
||||||
|
|
||||||
// power * position
|
// power * position
|
||||||
int64_t theta_length = arange_length * position_length;
|
int64_t theta_length = arange_length * position_length;
|
||||||
|
@ -2620,10 +2638,10 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
theta_nb[i] = theta_nb[i - 1] * theta_ne[i - 1];
|
theta_nb[i] = theta_nb[i - 1] * theta_ne[i - 1];
|
||||||
}
|
}
|
||||||
aclTensor* acl_theta_tensor =
|
aclTensor* acl_theta_tensor =
|
||||||
create_acl_tensor(theta_buffer, ACL_FLOAT, sizeof(float_t), theta_ne,
|
ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float_t),
|
||||||
theta_nb, GGML_MAX_DIMS);
|
theta_ne, theta_nb, GGML_MAX_DIMS);
|
||||||
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
|
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
|
||||||
acl_theta_tensor);
|
acl_theta_tensor);
|
||||||
|
|
||||||
// permute: [0,1,2,3]->[0,2,1,3]
|
// permute: [0,1,2,3]->[0,2,1,3]
|
||||||
int64_t permute_ne[] = {arange_length, 1, position_length, 1};
|
int64_t permute_ne[] = {arange_length, 1, position_length, 1};
|
||||||
|
@ -2635,9 +2653,9 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
ggml_cann_pool_alloc permute_allocator(ctx.pool(),
|
ggml_cann_pool_alloc permute_allocator(ctx.pool(),
|
||||||
theta_length * sizeof(float_t));
|
theta_length * sizeof(float_t));
|
||||||
void* permute_buffer = permute_allocator.get();
|
void* permute_buffer = permute_allocator.get();
|
||||||
aclTensor* acl_permute_tensor =
|
aclTensor* acl_permute_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(permute_buffer, ACL_FLOAT, sizeof(float_t),
|
permute_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
||||||
permute_ne, permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||||
int64_t permute_dim[] = {0, 2, 1, 3};
|
int64_t permute_dim[] = {0, 2, 1, 3};
|
||||||
int64_t num_dims = 4;
|
int64_t num_dims = 4;
|
||||||
aclnn_permute(ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
|
aclnn_permute(ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
|
||||||
|
@ -2647,17 +2665,17 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
||||||
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
|
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
|
||||||
theta_length * sizeof(float_t));
|
theta_length * sizeof(float_t));
|
||||||
void* sin_buffer = sin_allocator.get();
|
void* sin_buffer = sin_allocator.get();
|
||||||
aclTensor* acl_sin_tensor =
|
aclTensor* acl_sin_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(sin_buffer, ACL_FLOAT, sizeof(float_t), permute_ne,
|
sin_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
||||||
permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||||
aclnn_sin(ctx, acl_permute_tensor, acl_sin_tensor);
|
aclnn_sin(ctx, acl_permute_tensor, acl_sin_tensor);
|
||||||
|
|
||||||
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
|
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
|
||||||
theta_length * sizeof(float_t));
|
theta_length * sizeof(float_t));
|
||||||
void* cos_buffer = cos_allocator.get();
|
void* cos_buffer = cos_allocator.get();
|
||||||
aclTensor* acl_cos_tensor =
|
aclTensor* acl_cos_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t), permute_ne,
|
cos_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
||||||
permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
||||||
aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
|
aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
|
||||||
|
|
||||||
// repeat
|
// repeat
|
||||||
|
@ -2742,11 +2760,11 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
|
sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
|
||||||
}
|
}
|
||||||
aclTensor* acl_sin_reshape_tensor =
|
aclTensor* acl_sin_reshape_tensor =
|
||||||
create_acl_tensor(sin_buffer, ACL_FLOAT, sizeof(float_t),
|
ggml_cann_create_tensor(sin_buffer, ACL_FLOAT, sizeof(float_t),
|
||||||
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
||||||
aclTensor* acl_cos_reshape_tensor =
|
aclTensor* acl_cos_reshape_tensor =
|
||||||
create_acl_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
|
ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
|
||||||
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
||||||
aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
|
aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
|
||||||
theta_scale, is_neox);
|
theta_scale, is_neox);
|
||||||
|
|
||||||
|
@ -2767,13 +2785,14 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||||
input_roll_nb[i] = input_roll_nb[i - 1] * input_roll_ne[i - 1];
|
input_roll_nb[i] = input_roll_nb[i - 1] * input_roll_ne[i - 1];
|
||||||
}
|
}
|
||||||
aclTensor* acl_input_roll_tensor =
|
aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(input_roll_buffer, type_mapping(src0->type),
|
input_roll_buffer, ggml_cann_type_mapping(src0->type),
|
||||||
ggml_type_size(src0->type), input_roll_ne,
|
ggml_type_size(src0->type), input_roll_ne, input_roll_nb,
|
||||||
input_roll_nb, GGML_MAX_DIMS);
|
GGML_MAX_DIMS);
|
||||||
aclTensor* acl_input_tensor = create_acl_tensor(
|
aclTensor* acl_input_tensor = ggml_cann_create_tensor(
|
||||||
src0->data, type_mapping(src0->type), ggml_type_size(src0->type),
|
src0->data, ggml_cann_type_mapping(src0->type),
|
||||||
input_roll_ne, input_roll_nb, GGML_MAX_DIMS);
|
ggml_type_size(src0->type), input_roll_ne, input_roll_nb,
|
||||||
|
GGML_MAX_DIMS);
|
||||||
|
|
||||||
int64_t shifts[] = {1};
|
int64_t shifts[] = {1};
|
||||||
int64_t dims[] = {3};
|
int64_t dims[] = {3};
|
||||||
|
@ -2806,10 +2825,10 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
// roll input: [q0,q1,q2,...] ->
|
// roll input: [q0,q1,q2,...] ->
|
||||||
// [q_half,q_half+1,...,q_end,q0,q1,...q_half-1]
|
// [q_half,q_half+1,...,q_end,q0,q1,...q_half-1]
|
||||||
input_roll_buffer = roll_allocator.get();
|
input_roll_buffer = roll_allocator.get();
|
||||||
aclTensor* acl_input_roll_tensor = create_acl_tensor(
|
aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor(
|
||||||
input_roll_buffer, type_mapping(src0->type),
|
input_roll_buffer, ggml_cann_type_mapping(src0->type),
|
||||||
ggml_type_size(src0->type), src0->ne, src0->nb, GGML_MAX_DIMS);
|
ggml_type_size(src0->type), src0->ne, src0->nb, GGML_MAX_DIMS);
|
||||||
aclTensor* acl_input_tensor = create_acl_tensor(src0);
|
aclTensor* acl_input_tensor = ggml_cann_create_tensor(src0);
|
||||||
|
|
||||||
int64_t shifts[] = {src0->ne[0] / 2};
|
int64_t shifts[] = {src0->ne[0] / 2};
|
||||||
int64_t dims[] = {3};
|
int64_t dims[] = {3};
|
||||||
|
@ -2837,7 +2856,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||||
first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1];
|
first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1];
|
||||||
}
|
}
|
||||||
aclTensor* acl_first_half_tensor = create_acl_tensor(
|
aclTensor* acl_first_half_tensor = ggml_cann_create_tensor(
|
||||||
minus_one_scale_buffer, ACL_FLOAT, sizeof(float_t), first_half_ne,
|
minus_one_scale_buffer, ACL_FLOAT, sizeof(float_t), first_half_ne,
|
||||||
first_half_nb, GGML_MAX_DIMS);
|
first_half_nb, GGML_MAX_DIMS);
|
||||||
bool inplace = true;
|
bool inplace = true;
|
||||||
|
@ -2858,19 +2877,19 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||||
input_nb[i] = input_nb[i - 1] * src0->ne[i - 1];
|
input_nb[i] = input_nb[i - 1] * src0->ne[i - 1];
|
||||||
}
|
}
|
||||||
aclTensor* acl_input_roll_mul_scale_tensor = create_acl_tensor(
|
aclTensor* acl_input_roll_mul_scale_tensor = ggml_cann_create_tensor(
|
||||||
input_roll_mul_scale_buffer, type_mapping(src0->type),
|
input_roll_mul_scale_buffer, ggml_cann_type_mapping(src0->type),
|
||||||
|
ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
|
||||||
|
aclTensor* acl_input_roll_reshape_tensor = ggml_cann_create_tensor(
|
||||||
|
input_roll_buffer, ggml_cann_type_mapping(src0->type),
|
||||||
ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
|
ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
|
||||||
aclTensor* acl_input_roll_reshape_tensor = create_acl_tensor(
|
|
||||||
input_roll_buffer, type_mapping(src0->type), ggml_type_size(src0->type),
|
|
||||||
src0->ne, input_nb, GGML_MAX_DIMS);
|
|
||||||
|
|
||||||
aclnn_mul(ctx, acl_input_roll_reshape_tensor,
|
aclnn_mul(ctx, acl_input_roll_reshape_tensor, acl_minus_one_tensor,
|
||||||
acl_minus_one_tensor, acl_input_roll_mul_scale_tensor);
|
acl_input_roll_mul_scale_tensor);
|
||||||
|
|
||||||
// output
|
// output
|
||||||
aclTensor* acl_src0 = create_acl_tensor(src0);
|
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
void* output_fp32_buffer;
|
void* output_fp32_buffer;
|
||||||
if (src0->type == GGML_TYPE_F32) {
|
if (src0->type == GGML_TYPE_F32) {
|
||||||
aclnn_inplace_mul(ctx, acl_src0, acl_cos_reshape_tensor);
|
aclnn_inplace_mul(ctx, acl_src0, acl_cos_reshape_tensor);
|
||||||
|
@ -2887,26 +2906,25 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_cann_pool_alloc fp32_allocator1(
|
ggml_cann_pool_alloc fp32_allocator1(
|
||||||
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
||||||
void* input_fp32_buffer1 = fp32_allocator1.get();
|
void* input_fp32_buffer1 = fp32_allocator1.get();
|
||||||
aclTensor* input_fp32_tensor1 =
|
aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(input_fp32_buffer1, ACL_FLOAT, sizeof(float_t),
|
input_fp32_buffer1, ACL_FLOAT, sizeof(float_t), dst->ne,
|
||||||
dst->ne, input_fp32_nb, GGML_MAX_DIMS);
|
input_fp32_nb, GGML_MAX_DIMS);
|
||||||
ggml_cann_pool_alloc fp32_allocator2(
|
ggml_cann_pool_alloc fp32_allocator2(
|
||||||
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
||||||
void* input_fp32_buffer2 = fp32_allocator2.get();
|
void* input_fp32_buffer2 = fp32_allocator2.get();
|
||||||
aclTensor* input_fp32_tensor2 =
|
aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(input_fp32_buffer2, ACL_FLOAT, sizeof(float_t),
|
input_fp32_buffer2, ACL_FLOAT, sizeof(float_t), dst->ne,
|
||||||
dst->ne, input_fp32_nb, GGML_MAX_DIMS);
|
input_fp32_nb, GGML_MAX_DIMS);
|
||||||
|
|
||||||
ggml_cann_pool_alloc fp32_allocator(
|
ggml_cann_pool_alloc fp32_allocator(
|
||||||
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
|
||||||
output_fp32_buffer = fp32_allocator.get();
|
output_fp32_buffer = fp32_allocator.get();
|
||||||
aclTensor* output_fp32_tensor =
|
aclTensor* output_fp32_tensor = ggml_cann_create_tensor(
|
||||||
create_acl_tensor(output_fp32_buffer, ACL_FLOAT, sizeof(float_t),
|
output_fp32_buffer, ACL_FLOAT, sizeof(float_t), dst->ne,
|
||||||
dst->ne, input_fp32_nb, GGML_MAX_DIMS);
|
input_fp32_nb, GGML_MAX_DIMS);
|
||||||
aclnn_mul(ctx, acl_src0, acl_cos_reshape_tensor,
|
aclnn_mul(ctx, acl_src0, acl_cos_reshape_tensor, input_fp32_tensor1);
|
||||||
input_fp32_tensor1);
|
aclnn_mul(ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
|
||||||
aclnn_mul(ctx, acl_input_roll_mul_scale_tensor,
|
input_fp32_tensor2);
|
||||||
acl_sin_reshape_tensor, input_fp32_tensor2);
|
|
||||||
aclnn_add(ctx, input_fp32_tensor1, input_fp32_tensor2,
|
aclnn_add(ctx, input_fp32_tensor1, input_fp32_tensor2,
|
||||||
output_fp32_tensor);
|
output_fp32_tensor);
|
||||||
aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16);
|
aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16);
|
||||||
|
|
|
@ -497,15 +497,15 @@ void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
aclTensor* acl_dst;
|
aclTensor* acl_dst;
|
||||||
|
|
||||||
// Need bcast
|
// Need bcast
|
||||||
if (!ggml_are_same_shape(src0, src1) && need_bcast(src0, src1)) {
|
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
||||||
BCAST_SHAPE(src0, src1)
|
BCAST_SHAPE(src0, src1)
|
||||||
acl_src0 = create_acl_tensor(src0, BCAST_PARAM(src0));
|
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
||||||
acl_src1 = create_acl_tensor(src1, BCAST_PARAM(src1));
|
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
||||||
acl_dst = create_acl_tensor(dst, BCAST_PARAM(src0));
|
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
||||||
} else {
|
} else {
|
||||||
acl_src0 = create_acl_tensor(src0);
|
acl_src0 = ggml_cann_create_tensor(src0);
|
||||||
acl_src1 = create_acl_tensor(src1);
|
acl_src1 = ggml_cann_create_tensor(src1);
|
||||||
acl_dst = create_acl_tensor(dst);
|
acl_dst = ggml_cann_create_tensor(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
|
@ -538,8 +538,8 @@ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
|
@ -569,8 +569,8 @@ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
aclTensor* acl_src = create_acl_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = create_acl_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
uint64_t workspaceSize = 0;
|
||||||
aclOpExecutor* executor;
|
aclOpExecutor* executor;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue