Merge branch 'master' into concedo_experimental
This commit is contained in:
commit
28ad80b6e4
2 changed files with 508 additions and 181 deletions
544
ggml.c
544
ggml.c
|
@ -195,8 +195,8 @@ typedef void * thread_ret_t;
|
|||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||
#else
|
||||
inline static void* ggml_aligned_malloc(size_t size) {
|
||||
void* aligned_memory = NULL;
|
||||
inline static void * ggml_aligned_malloc(size_t size) {
|
||||
void * aligned_memory = NULL;
|
||||
#ifdef GGML_USE_METAL
|
||||
int result = posix_memalign(&aligned_memory, getpagesize(), size);
|
||||
#else
|
||||
|
@ -3812,7 +3812,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
||||
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
|
@ -3884,7 +3884,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
||||
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
|
@ -4111,7 +4111,7 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
|||
//
|
||||
// is enough, but just in case, adding the second part
|
||||
|
||||
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
|
||||
return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
|
||||
}
|
||||
|
||||
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
||||
|
@ -4254,7 +4254,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
|
|||
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
||||
}
|
||||
|
||||
static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return
|
||||
|
@ -4603,7 +4603,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||
/*.ne =*/ { 1, 1, 1, 1 },
|
||||
/*.nb =*/ { 0, 0, 0, 0 },
|
||||
/*.op =*/ GGML_OP_NONE,
|
||||
/*.op_params =*/ {0},
|
||||
/*.op_params =*/ { 0 },
|
||||
/*.is_param =*/ false,
|
||||
/*.grad =*/ NULL,
|
||||
/*.src =*/ { NULL },
|
||||
|
@ -4635,6 +4635,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||
}
|
||||
|
||||
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
||||
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
||||
assert(params_size <= GGML_MAX_OP_PARAMS);
|
||||
memcpy(tensor->op_params, params, params_size);
|
||||
}
|
||||
|
@ -6440,7 +6441,7 @@ struct ggml_tensor * ggml_permute(
|
|||
result->src[0] = a;
|
||||
|
||||
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -6566,7 +6567,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_DIAG_MASK_INF;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6606,7 +6607,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_DIAG_MASK_ZERO;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6722,9 +6723,9 @@ static struct ggml_tensor * ggml_rope_impl(
|
|||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
||||
memcpy(params + 4, &freq_base, sizeof(float));
|
||||
memcpy(params + 4, &freq_base, sizeof(float));
|
||||
memcpy(params + 5, &freq_scale, sizeof(float));
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_ROPE;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6798,7 +6799,7 @@ struct ggml_tensor * ggml_rope_back(
|
|||
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||
|
||||
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_ROPE_BACK;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6829,7 +6830,7 @@ struct ggml_tensor * ggml_alibi(
|
|||
|
||||
int32_t op_params[3] = { n_past, n_head };
|
||||
memcpy(op_params + 2, &bias_max, sizeof(float));
|
||||
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
||||
ggml_set_op_params(result, op_params, sizeof(op_params));
|
||||
|
||||
result->op = GGML_OP_ALIBI;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6856,7 +6857,7 @@ struct ggml_tensor * ggml_clamp(
|
|||
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
||||
|
||||
float params[] = { min, max };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CLAMP;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6891,10 +6892,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|||
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
|
||||
a->ne[2], 1, 1,
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
int32_t params[] = { s0, p0, d0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CONV_1D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6906,10 +6907,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|||
|
||||
// ggml_conv_2d
|
||||
|
||||
struct ggml_tensor* ggml_conv_2d(
|
||||
struct ggml_context* ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * ggml_conv_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
|
@ -6930,10 +6931,10 @@ struct ggml_tensor* ggml_conv_2d(
|
|||
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
|
||||
a->ne[3], b->ne[3],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_CONV_2D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6946,7 +6947,7 @@ struct ggml_tensor* ggml_conv_2d(
|
|||
|
||||
// ggml_conv_1d_ph
|
||||
|
||||
struct ggml_tensor* ggml_conv_1d_ph(
|
||||
struct ggml_tensor * ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
|
@ -6964,7 +6965,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|||
|
||||
// ggml_pool_1d
|
||||
|
||||
struct ggml_tensor* ggml_pool_1d(
|
||||
struct ggml_tensor * ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
|
@ -6983,10 +6984,10 @@ struct ggml_tensor* ggml_pool_1d(
|
|||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
a->ne[1],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
int32_t params[] = { op, k0, s0, p0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_POOL_1D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -6997,7 +6998,7 @@ struct ggml_tensor* ggml_pool_1d(
|
|||
|
||||
// ggml_pool_2d
|
||||
|
||||
struct ggml_tensor* ggml_pool_2d(
|
||||
struct ggml_tensor * ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
|
@ -7020,10 +7021,10 @@ struct ggml_tensor* ggml_pool_2d(
|
|||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||
a->ne[2],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_POOL_2D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7191,7 +7192,7 @@ struct ggml_tensor * ggml_win_part(
|
|||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { npx, npy, w };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_WIN_PART;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7221,7 +7222,7 @@ struct ggml_tensor * ggml_win_unpart(
|
|||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
int32_t params[] = { w };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_WIN_UNPART;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7350,7 +7351,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
// ggml_map_custom1_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -7367,7 +7368,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->op = GGML_OP_MAP_CUSTOM1_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
|
@ -7388,7 +7389,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|||
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
// ggml_map_custom2_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -7406,7 +7407,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->op = GGML_OP_MAP_CUSTOM2_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
@ -7430,7 +7431,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
// ggml_map_custom3_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -7449,7 +7450,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->op = GGML_OP_MAP_CUSTOM3_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
@ -7476,6 +7477,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
|||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
struct ggml_map_custom1_op_params {
|
||||
ggml_custom1_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && a->grad) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom1_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
|
||||
struct ggml_map_custom2_op_params {
|
||||
ggml_custom2_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom2_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
|
||||
struct ggml_map_custom3_op_params {
|
||||
ggml_custom3_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad || c->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom3_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
result->src[2] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ggml_cross_entropy_loss
|
||||
|
||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
|
@ -9284,8 +9469,8 @@ static void ggml_compute_forward_sum_rows_f32(
|
|||
for (int64_t i3 = 0; i3 < ne03; i3++) {
|
||||
for (int64_t i2 = 0; i2 < ne02; i2++) {
|
||||
for (int64_t i1 = 0; i1 < ne01; i1++) {
|
||||
float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||
float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||
float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
float row_sum = 0;
|
||||
ggml_vec_sum_f32(ne00, &row_sum, src_row);
|
||||
dst_row[0] = row_sum;
|
||||
|
@ -10542,72 +10727,96 @@ static void ggml_compute_forward_mul_mat(
|
|||
return;
|
||||
}
|
||||
|
||||
// parallelize by src0 rows
|
||||
const int64_t dr = (ne01 + nth - 1)/nth;
|
||||
|
||||
const int64_t ir10 = dr*ith;
|
||||
const int64_t ir11 = MIN(ir10 + dr, ne01);
|
||||
|
||||
// src1 rows
|
||||
const int64_t nr1 = ne11*ne12*ne13;
|
||||
|
||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
|
||||
|
||||
for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
|
||||
const int64_t i13 = (ir1/(ne12*ne11));
|
||||
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
||||
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = ne11*ne12*ne13; // src1 rows
|
||||
|
||||
const int64_t ir0 = (ir1/ne11)%(ne02*ne03);
|
||||
const int64_t i03 = (ir0/(ne02));
|
||||
// Hack for "Falcon multi-query-attention key stutter" / alternative to ggml_repeat2.
|
||||
// See https://github.com/ggerganov/llama.cpp/issues/1602#issuecomment-1606087470:
|
||||
// GG: this is likely the correct way to broadcast, though need some more thought
|
||||
// therefore leaving the comments to remind us for now
|
||||
const int64_t i02 = (i12 / (ne12 / ne02));
|
||||
// Original from PR/224 (and also essential/correct for non-broadcast matmuls in Falcon)
|
||||
// const int64_t i02 = (ir0 - i03*ne02);
|
||||
//printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
|
||||
|
||||
const int64_t i1 = i11;
|
||||
const int64_t i2 = i12;
|
||||
const int64_t i3 = i13;
|
||||
// distribute the thread work across the inner or outer loop based on which one is larger
|
||||
|
||||
const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
|
||||
const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
|
||||
const int64_t nth1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
|
||||
|
||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||
// the original src1 data pointer, so we should index using the indices directly
|
||||
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
||||
const char * src1_col = (const char *) wdata +
|
||||
(src1_cont || src1->type != vec_dot_type
|
||||
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
||||
: (i11*nb11 + i12*nb12 + i13*nb13));
|
||||
const int64_t ith0 = ith % nth0;
|
||||
const int64_t ith1 = ith / nth0;
|
||||
|
||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
||||
const int64_t dr0 = (nr0 + nth0 - 1)/nth0;
|
||||
const int64_t dr1 = (nr1 + nth1 - 1)/nth1;
|
||||
|
||||
for (int64_t ir = ir10; ir < ir11; ++ir) {
|
||||
vec_dot(ne00, &dst_col[ir], src0_row + ir*nb01, src1_col);
|
||||
}
|
||||
const int64_t ir010 = dr0*ith0;
|
||||
const int64_t ir011 = MIN(ir010 + dr0, nr0);
|
||||
|
||||
const int64_t ir110 = dr1*ith1;
|
||||
const int64_t ir111 = MIN(ir110 + dr1, nr1);
|
||||
|
||||
//printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
|
||||
|
||||
// threads with no work simply yield (not sure if it helps)
|
||||
if (ir010 >= ir011 || ir110 >= ir111) {
|
||||
sched_yield();
|
||||
return;
|
||||
}
|
||||
|
||||
//int64_t t1 = ggml_time_us();
|
||||
//static int64_t acc = 0;
|
||||
//acc += t1 - t0;
|
||||
//if (t1 - t0 > 10) {
|
||||
// printf("\n");
|
||||
// printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03);
|
||||
// printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03);
|
||||
// printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13);
|
||||
assert(ne12 % ne02 == 0);
|
||||
assert(ne13 % ne03 == 0);
|
||||
|
||||
// printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc);
|
||||
//}
|
||||
// broadcast factors
|
||||
const int64_t r2 = ne12/ne02;
|
||||
const int64_t r3 = ne13/ne03;
|
||||
|
||||
// block-tiling attempt
|
||||
const int64_t blck_0 = 16;
|
||||
const int64_t blck_1 = 16;
|
||||
|
||||
// attempt to reduce false-sharing (does not seem to make a difference)
|
||||
float tmp[16];
|
||||
|
||||
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
||||
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
||||
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
|
||||
const int64_t i13 = (ir1/(ne12*ne11));
|
||||
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
||||
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
||||
|
||||
// broadcast src0 into src1
|
||||
const int64_t i03 = i13/r3;
|
||||
const int64_t i02 = i12/r2;
|
||||
|
||||
const int64_t i1 = i11;
|
||||
const int64_t i2 = i12;
|
||||
const int64_t i3 = i13;
|
||||
|
||||
const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03);
|
||||
|
||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||
// the original src1 data pointer, so we should index using the indices directly
|
||||
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
||||
const char * src1_col = (const char *) wdata +
|
||||
(src1_cont || src1->type != vec_dot_type
|
||||
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
||||
: (i11*nb11 + i12*nb12 + i13*nb13));
|
||||
|
||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
||||
|
||||
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
||||
//}
|
||||
|
||||
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
||||
}
|
||||
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ggml_compute_forward_out_prod
|
||||
|
||||
|
||||
static void ggml_compute_forward_out_prod_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0,
|
||||
|
@ -12890,7 +13099,7 @@ static void ggml_compute_forward_pool_1d(
|
|||
const struct ggml_tensor * src0,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = opts[0];
|
||||
const int k0 = opts[1];
|
||||
const int s0 = opts[2];
|
||||
|
@ -14223,24 +14432,6 @@ static void ggml_compute_forward_map_custom1_f32(
|
|||
fun(dst, a);
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2_f32(
|
||||
|
@ -14259,24 +14450,6 @@ static void ggml_compute_forward_map_custom2_f32(
|
|||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3_f32(
|
||||
|
@ -14295,24 +14468,52 @@ static void ggml_compute_forward_map_custom3_f32(
|
|||
fun(dst, a, b, c);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom1
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
const struct ggml_tensor * c,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_cross_entropy_loss
|
||||
|
@ -14834,25 +15035,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
{
|
||||
ggml_custom1_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
||||
ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
{
|
||||
ggml_custom2_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
{
|
||||
ggml_custom3_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
ggml_custom2_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
ggml_custom3_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
|
@ -15660,6 +15876,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|||
} break;
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
|
@ -16449,12 +16668,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|||
case GGML_OP_WIN_UNPART:
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
{
|
||||
n_tasks = 1;
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
|
|
145
ggml.h
145
ggml.h
|
@ -183,6 +183,15 @@
|
|||
# define GGML_API
|
||||
#endif
|
||||
|
||||
// TODO: support for clang
|
||||
#ifdef __GNUC__
|
||||
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
||||
#else
|
||||
# define GGML_DEPRECATED(func, hint) func
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
@ -374,6 +383,10 @@ extern "C" {
|
|||
GGML_OP_MAP_UNARY,
|
||||
GGML_OP_MAP_BINARY,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1_F32,
|
||||
GGML_OP_MAP_CUSTOM2_F32,
|
||||
GGML_OP_MAP_CUSTOM3_F32,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1,
|
||||
GGML_OP_MAP_CUSTOM2,
|
||||
GGML_OP_MAP_CUSTOM3,
|
||||
|
@ -570,6 +583,8 @@ extern "C" {
|
|||
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
|
||||
// use this to compute the memory overhead of a tensor
|
||||
GGML_API size_t ggml_tensor_overhead(void);
|
||||
|
||||
|
@ -1240,7 +1255,7 @@ extern "C" {
|
|||
|
||||
// conv_1d with padding = half
|
||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
|
@ -1253,7 +1268,7 @@ extern "C" {
|
|||
GGML_OP_POOL_COUNT,
|
||||
};
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_1d(
|
||||
GGML_API struct ggml_tensor * ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
|
@ -1261,7 +1276,7 @@ extern "C" {
|
|||
int s0, // stride
|
||||
int p0); // padding
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_2d(
|
||||
GGML_API struct ggml_tensor * ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
|
@ -1315,15 +1330,6 @@ extern "C" {
|
|||
int h0,
|
||||
int w);
|
||||
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_unary(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -1334,63 +1340,138 @@ extern "C" {
|
|||
struct ggml_tensor * a,
|
||||
enum ggml_unary_op op);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun),
|
||||
"use ggml_map_custom1 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun),
|
||||
"use ggml_map_custom1_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun),
|
||||
"use ggml_map_custom2 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun),
|
||||
"use ggml_map_custom2_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
ggml_custom1_op_f32_t fun),
|
||||
"use ggml_map_custom1 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
ggml_custom1_op_f32_t fun),
|
||||
"use ggml_map_custom1_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
ggml_custom2_op_f32_t fun),
|
||||
"use ggml_map_custom2 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
ggml_custom2_op_f32_t fun),
|
||||
"use ggml_map_custom2_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
ggml_custom3_op_f32_t fun),
|
||||
"use ggml_map_custom3 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
ggml_custom3_op_f32_t fun),
|
||||
"use ggml_map_custom3_inplace instead");
|
||||
|
||||
// custom operators v2
|
||||
|
||||
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
||||
|
||||
#define GGML_N_TASKS_MAX -1
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
// loss function
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue