Merge branch 'master' into gguf
This commit is contained in:
commit
1da82c551f
44 changed files with 6393 additions and 2794 deletions
626
ggml.c
626
ggml.c
|
@ -195,8 +195,8 @@ typedef void * thread_ret_t;
|
|||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||
#else
|
||||
inline static void* ggml_aligned_malloc(size_t size) {
|
||||
void* aligned_memory = NULL;
|
||||
inline static void * ggml_aligned_malloc(size_t size) {
|
||||
void * aligned_memory = NULL;
|
||||
#ifdef GGML_USE_METAL
|
||||
int result = posix_memalign(&aligned_memory, getpagesize(), size);
|
||||
#else
|
||||
|
@ -3810,7 +3810,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
||||
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
|
@ -3882,7 +3882,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
||||
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
|
@ -4109,7 +4109,7 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
|||
//
|
||||
// is enough, but just in case, adding the second part
|
||||
|
||||
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
|
||||
return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
|
||||
}
|
||||
|
||||
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
||||
|
@ -4252,7 +4252,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
|
|||
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
||||
}
|
||||
|
||||
static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return
|
||||
|
@ -4556,10 +4556,12 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
|
|||
|
||||
static struct ggml_tensor * ggml_new_tensor_impl(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int n_dims,
|
||||
const int64_t* ne,
|
||||
void* data) {
|
||||
enum ggml_type type,
|
||||
int n_dims,
|
||||
const int64_t * ne,
|
||||
void * data) {
|
||||
|
||||
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
|
||||
|
||||
size_t data_size = 0;
|
||||
|
||||
|
@ -4599,7 +4601,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||
/*.ne =*/ { 1, 1, 1, 1 },
|
||||
/*.nb =*/ { 0, 0, 0, 0 },
|
||||
/*.op =*/ GGML_OP_NONE,
|
||||
/*.op_params =*/ {0},
|
||||
/*.op_params =*/ { 0 },
|
||||
/*.is_param =*/ false,
|
||||
/*.grad =*/ NULL,
|
||||
/*.src =*/ { NULL },
|
||||
|
@ -4631,6 +4633,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||
}
|
||||
|
||||
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
||||
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
||||
assert(params_size <= GGML_MAX_OP_PARAMS);
|
||||
memcpy(tensor->op_params, params, params_size);
|
||||
}
|
||||
|
@ -4647,22 +4650,22 @@ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int3
|
|||
|
||||
struct ggml_tensor * ggml_new_tensor(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int n_dims,
|
||||
const int64_t * ne) {
|
||||
enum ggml_type type,
|
||||
int n_dims,
|
||||
const int64_t * ne) {
|
||||
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor_1d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
enum ggml_type type,
|
||||
int64_t ne0) {
|
||||
return ggml_new_tensor(ctx, type, 1, &ne0);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor_2d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
enum ggml_type type,
|
||||
int64_t ne0,
|
||||
int64_t ne1) {
|
||||
const int64_t ne[2] = { ne0, ne1 };
|
||||
|
@ -4671,7 +4674,7 @@ struct ggml_tensor * ggml_new_tensor_2d(
|
|||
|
||||
struct ggml_tensor * ggml_new_tensor_3d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
enum ggml_type type,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2) {
|
||||
|
@ -4981,11 +4984,6 @@ enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
|||
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
||||
}
|
||||
|
||||
static void ggml_set_unary_op(struct ggml_tensor * tensor, enum ggml_unary_op op) {
|
||||
GGML_ASSERT(tensor->op = GGML_OP_UNARY);
|
||||
ggml_set_op_params_i32(tensor, 0, (int32_t) op);
|
||||
}
|
||||
|
||||
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
||||
return tensor->name;
|
||||
}
|
||||
|
@ -6242,6 +6240,27 @@ struct ggml_tensor * ggml_reshape_4d(
|
|||
|
||||
// ggml_view_1d
|
||||
|
||||
static struct ggml_tensor * ggml_view_tensor_offset(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_dims,
|
||||
const int64_t * ne,
|
||||
size_t offset) {
|
||||
// don't calculate an offset from an unallocated tensor
|
||||
void * data = NULL;
|
||||
if (a->data != NULL) {
|
||||
data = (char *) a->data + offset;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
|
||||
|
||||
ggml_format_name(result, "%s (view)", a->name);
|
||||
|
||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_view_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -6254,10 +6273,7 @@ struct ggml_tensor * ggml_view_1d(
|
|||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
|
||||
ggml_format_name(result, "%s (view)", a->name);
|
||||
|
||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
||||
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
|
||||
|
||||
result->op = GGML_OP_VIEW;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6284,10 +6300,7 @@ struct ggml_tensor * ggml_view_2d(
|
|||
|
||||
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
||||
ggml_format_name(result, "%s (view)", a->name);
|
||||
|
||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
||||
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
|
||||
|
||||
result->nb[1] = nb1;
|
||||
result->nb[2] = result->nb[1]*ne1;
|
||||
|
@ -6320,10 +6333,7 @@ struct ggml_tensor * ggml_view_3d(
|
|||
|
||||
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
||||
ggml_format_name(result, "%s (view)", a->name);
|
||||
|
||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
||||
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
|
||||
|
||||
result->nb[1] = nb1;
|
||||
result->nb[2] = nb2;
|
||||
|
@ -6358,10 +6368,7 @@ struct ggml_tensor * ggml_view_4d(
|
|||
|
||||
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
|
||||
ggml_format_name(result, "%s (view)", a->name);
|
||||
|
||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
||||
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
|
||||
|
||||
result->nb[1] = nb1;
|
||||
result->nb[2] = nb2;
|
||||
|
@ -6432,7 +6439,7 @@ struct ggml_tensor * ggml_permute(
|
|||
result->src[0] = a;
|
||||
|
||||
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -6558,7 +6565,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_DIAG_MASK_INF;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6598,7 +6605,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_DIAG_MASK_ZERO;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6714,9 +6721,9 @@ static struct ggml_tensor * ggml_rope_impl(
|
|||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
||||
memcpy(params + 4, &freq_base, sizeof(float));
|
||||
memcpy(params + 4, &freq_base, sizeof(float));
|
||||
memcpy(params + 5, &freq_scale, sizeof(float));
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_ROPE;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6745,6 +6752,18 @@ struct ggml_tensor * ggml_rope_inplace(
|
|||
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_rope_custom(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_dims,
|
||||
int mode,
|
||||
int n_ctx,
|
||||
float freq_base,
|
||||
float freq_scale) {
|
||||
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_rope_custom_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -6778,7 +6797,7 @@ struct ggml_tensor * ggml_rope_back(
|
|||
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_ROPE_BACK;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6809,7 +6828,7 @@ struct ggml_tensor * ggml_alibi(
|
|||
|
||||
int32_t op_params[3] = { n_past, n_head };
|
||||
memcpy(op_params + 2, &bias_max, sizeof(float));
|
||||
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
||||
ggml_set_op_params(result, op_params, sizeof(op_params));
|
||||
|
||||
result->op = GGML_OP_ALIBI;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6836,7 +6855,7 @@ struct ggml_tensor * ggml_clamp(
|
|||
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
||||
|
||||
float params[] = { min, max };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CLAMP;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6871,10 +6890,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|||
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
|
||||
a->ne[2], 1, 1,
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
int32_t params[] = { s0, p0, d0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CONV_1D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6886,10 +6905,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|||
|
||||
// ggml_conv_2d
|
||||
|
||||
struct ggml_tensor* ggml_conv_2d(
|
||||
struct ggml_context* ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * ggml_conv_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
|
@ -6910,10 +6929,10 @@ struct ggml_tensor* ggml_conv_2d(
|
|||
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
|
||||
a->ne[3], b->ne[3],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CONV_2D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6926,7 +6945,7 @@ struct ggml_tensor* ggml_conv_2d(
|
|||
|
||||
// ggml_conv_1d_ph
|
||||
|
||||
struct ggml_tensor* ggml_conv_1d_ph(
|
||||
struct ggml_tensor * ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
|
@ -6944,7 +6963,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|||
|
||||
// ggml_pool_1d
|
||||
|
||||
struct ggml_tensor* ggml_pool_1d(
|
||||
struct ggml_tensor * ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
|
@ -6963,10 +6982,10 @@ struct ggml_tensor* ggml_pool_1d(
|
|||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
a->ne[1],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
int32_t params[] = { op, k0, s0, p0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_POOL_1D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6977,7 +6996,7 @@ struct ggml_tensor* ggml_pool_1d(
|
|||
|
||||
// ggml_pool_2d
|
||||
|
||||
struct ggml_tensor* ggml_pool_2d(
|
||||
struct ggml_tensor * ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
|
@ -7000,10 +7019,10 @@ struct ggml_tensor* ggml_pool_2d(
|
|||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||
a->ne[2],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_POOL_2D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7171,7 +7190,7 @@ struct ggml_tensor * ggml_win_part(
|
|||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { npx, npy, w };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_WIN_PART;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7201,7 +7220,7 @@ struct ggml_tensor * ggml_win_unpart(
|
|||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
int32_t params[] = { w };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_WIN_UNPART;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7225,7 +7244,7 @@ static struct ggml_tensor * ggml_unary_impl(
|
|||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_unary_op(result, op);
|
||||
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
||||
|
||||
result->op = GGML_OP_UNARY;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7330,7 +7349,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
// ggml_map_custom1_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -7347,7 +7366,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->op = GGML_OP_MAP_CUSTOM1_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
|
@ -7368,7 +7387,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|||
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
// ggml_map_custom2_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -7386,7 +7405,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->op = GGML_OP_MAP_CUSTOM2_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
@ -7410,7 +7429,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
// ggml_map_custom3_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -7429,7 +7448,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->op = GGML_OP_MAP_CUSTOM3_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
@ -7456,6 +7475,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
|||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
struct ggml_map_custom1_op_params {
|
||||
ggml_custom1_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && a->grad) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom1_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
|
||||
struct ggml_map_custom2_op_params {
|
||||
ggml_custom2_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom2_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
|
||||
struct ggml_map_custom3_op_params {
|
||||
ggml_custom3_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad || c->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom3_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
result->src[2] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ggml_cross_entropy_loss
|
||||
|
||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
|
@ -9264,8 +9467,8 @@ static void ggml_compute_forward_sum_rows_f32(
|
|||
for (int64_t i3 = 0; i3 < ne03; i3++) {
|
||||
for (int64_t i2 = 0; i2 < ne02; i2++) {
|
||||
for (int64_t i1 = 0; i1 < ne01; i1++) {
|
||||
float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||
float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||
float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
float row_sum = 0;
|
||||
ggml_vec_sum_f32(ne00, &row_sum, src_row);
|
||||
dst_row[0] = row_sum;
|
||||
|
@ -10527,72 +10730,96 @@ static void ggml_compute_forward_mul_mat(
|
|||
return;
|
||||
}
|
||||
|
||||
// parallelize by src0 rows
|
||||
const int64_t dr = (ne01 + nth - 1)/nth;
|
||||
|
||||
const int64_t ir10 = dr*ith;
|
||||
const int64_t ir11 = MIN(ir10 + dr, ne01);
|
||||
|
||||
// src1 rows
|
||||
const int64_t nr1 = ne11*ne12*ne13;
|
||||
|
||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
|
||||
|
||||
for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
|
||||
const int64_t i13 = (ir1/(ne12*ne11));
|
||||
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
||||
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = ne11*ne12*ne13; // src1 rows
|
||||
|
||||
const int64_t ir0 = (ir1/ne11)%(ne02*ne03);
|
||||
const int64_t i03 = (ir0/(ne02));
|
||||
// Hack for "Falcon multi-query-attention key stutter" / alternative to ggml_repeat2.
|
||||
// See https://github.com/ggerganov/llama.cpp/issues/1602#issuecomment-1606087470:
|
||||
// GG: this is likely the correct way to broadcast, though need some more thought
|
||||
// therefore leaving the comments to remind us for now
|
||||
const int64_t i02 = (i12 / (ne12 / ne02));
|
||||
// Original from PR/224 (and also essential/correct for non-broadcast matmuls in Falcon)
|
||||
// const int64_t i02 = (ir0 - i03*ne02);
|
||||
//printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
|
||||
|
||||
const int64_t i1 = i11;
|
||||
const int64_t i2 = i12;
|
||||
const int64_t i3 = i13;
|
||||
// distribute the thread work across the inner or outer loop based on which one is larger
|
||||
|
||||
const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
|
||||
const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
|
||||
const int64_t nth1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
|
||||
|
||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||
// the original src1 data pointer, so we should index using the indices directly
|
||||
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
||||
const char * src1_col = (const char *) wdata +
|
||||
(src1_cont || src1->type != vec_dot_type
|
||||
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
||||
: (i11*nb11 + i12*nb12 + i13*nb13));
|
||||
const int64_t ith0 = ith % nth0;
|
||||
const int64_t ith1 = ith / nth0;
|
||||
|
||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
||||
const int64_t dr0 = (nr0 + nth0 - 1)/nth0;
|
||||
const int64_t dr1 = (nr1 + nth1 - 1)/nth1;
|
||||
|
||||
for (int64_t ir = ir10; ir < ir11; ++ir) {
|
||||
vec_dot(ne00, &dst_col[ir], src0_row + ir*nb01, src1_col);
|
||||
}
|
||||
const int64_t ir010 = dr0*ith0;
|
||||
const int64_t ir011 = MIN(ir010 + dr0, nr0);
|
||||
|
||||
const int64_t ir110 = dr1*ith1;
|
||||
const int64_t ir111 = MIN(ir110 + dr1, nr1);
|
||||
|
||||
//printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
|
||||
|
||||
// threads with no work simply yield (not sure if it helps)
|
||||
if (ir010 >= ir011 || ir110 >= ir111) {
|
||||
sched_yield();
|
||||
return;
|
||||
}
|
||||
|
||||
//int64_t t1 = ggml_time_us();
|
||||
//static int64_t acc = 0;
|
||||
//acc += t1 - t0;
|
||||
//if (t1 - t0 > 10) {
|
||||
// printf("\n");
|
||||
// printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03);
|
||||
// printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03);
|
||||
// printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13);
|
||||
assert(ne12 % ne02 == 0);
|
||||
assert(ne13 % ne03 == 0);
|
||||
|
||||
// printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc);
|
||||
//}
|
||||
// broadcast factors
|
||||
const int64_t r2 = ne12/ne02;
|
||||
const int64_t r3 = ne13/ne03;
|
||||
|
||||
// block-tiling attempt
|
||||
const int64_t blck_0 = 16;
|
||||
const int64_t blck_1 = 16;
|
||||
|
||||
// attempt to reduce false-sharing (does not seem to make a difference)
|
||||
float tmp[16];
|
||||
|
||||
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
||||
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
||||
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
|
||||
const int64_t i13 = (ir1/(ne12*ne11));
|
||||
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
||||
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
||||
|
||||
// broadcast src0 into src1
|
||||
const int64_t i03 = i13/r3;
|
||||
const int64_t i02 = i12/r2;
|
||||
|
||||
const int64_t i1 = i11;
|
||||
const int64_t i2 = i12;
|
||||
const int64_t i3 = i13;
|
||||
|
||||
const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03);
|
||||
|
||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||
// the original src1 data pointer, so we should index using the indices directly
|
||||
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
||||
const char * src1_col = (const char *) wdata +
|
||||
(src1_cont || src1->type != vec_dot_type
|
||||
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
||||
: (i11*nb11 + i12*nb12 + i13*nb13));
|
||||
|
||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
||||
|
||||
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
||||
//}
|
||||
|
||||
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
||||
}
|
||||
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ggml_compute_forward_out_prod
|
||||
|
||||
|
||||
static void ggml_compute_forward_out_prod_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0,
|
||||
|
@ -12875,7 +13102,7 @@ static void ggml_compute_forward_pool_1d(
|
|||
const struct ggml_tensor * src0,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = opts[0];
|
||||
const int k0 = opts[1];
|
||||
const int s0 = opts[2];
|
||||
|
@ -14208,24 +14435,6 @@ static void ggml_compute_forward_map_custom1_f32(
|
|||
fun(dst, a);
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2_f32(
|
||||
|
@ -14244,24 +14453,6 @@ static void ggml_compute_forward_map_custom2_f32(
|
|||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3_f32(
|
||||
|
@ -14280,24 +14471,52 @@ static void ggml_compute_forward_map_custom3_f32(
|
|||
fun(dst, a, b, c);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom1
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
const struct ggml_tensor * c,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_cross_entropy_loss
|
||||
|
@ -14819,25 +15038,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
{
|
||||
ggml_custom1_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
||||
ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
{
|
||||
ggml_custom2_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
{
|
||||
ggml_custom3_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
ggml_custom2_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
ggml_custom3_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
|
@ -15645,6 +15879,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|||
} break;
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
|
@ -16430,12 +16667,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|||
case GGML_OP_WIN_UNPART:
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
{
|
||||
n_tasks = 1;
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue