add some new ops, fix some operators and add batch operations to certain operators. (ggml/747)
* cuda: fix group_norm * cuda: add batch inference support for ggml_pad/ggml_upscale * add ggml_arrange * add ggml_timestep_embedding * update ggml_arange/ggml_timestep_embedding tests * cuda: fix im2col * add ggml_arange/ggml_timestep_embbeding support for metal backend * fix some bugs * fix some bugs * Update ggml.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update ggml-cuda.cu Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update ggml-metal.m Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update ggml-metal.m Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update ggml-metal.metal Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * modify according to the review comments * ggml : fix compile warnings + code style * ggml : normalize compute_forward calls + fix seg fault in debug * minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
82f3e668ad
commit
7d43c585dc
6 changed files with 550 additions and 52 deletions
207
ggml.c
207
ggml.c
|
@ -1822,6 +1822,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"POOL_2D",
|
||||
"UPSCALE",
|
||||
"PAD",
|
||||
"ARANGE",
|
||||
"TIMESTEP_EMBEDDING",
|
||||
"ARGSORT",
|
||||
"LEAKY_RELU",
|
||||
|
||||
|
@ -1850,7 +1852,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 72, "GGML_OP_COUNT != 72");
|
||||
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
|
@ -1908,6 +1910,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"pool_2d(x)",
|
||||
"upscale(x)",
|
||||
"pad(x)",
|
||||
"arange(start, stop, step)",
|
||||
"timestep_embedding(timesteps, dim, max_period)",
|
||||
"argsort(x)",
|
||||
"leaky_relu(x)",
|
||||
|
||||
|
@ -1936,7 +1940,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 72, "GGML_OP_COUNT != 72");
|
||||
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
|
@ -2895,11 +2899,21 @@ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_
|
|||
return ((const int32_t *)(tensor->op_params))[i];
|
||||
}
|
||||
|
||||
static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
|
||||
assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
|
||||
return ((const float *)(tensor->op_params))[i];
|
||||
}
|
||||
|
||||
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
||||
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
||||
((int32_t *)(tensor->op_params))[i] = value;
|
||||
}
|
||||
|
||||
static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
|
||||
assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
|
||||
((float *)(tensor->op_params))[i] = value;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
|
||||
memset(tensor->data, 0, ggml_nbytes(tensor));
|
||||
return tensor;
|
||||
|
@ -5898,6 +5912,55 @@ struct ggml_tensor * ggml_upscale(
|
|||
return ggml_upscale_impl(ctx, a, scale_factor);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_arange(
|
||||
struct ggml_context * ctx,
|
||||
float start,
|
||||
float stop,
|
||||
float step) {
|
||||
|
||||
GGML_ASSERT(stop > start);
|
||||
|
||||
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
||||
|
||||
result->op = GGML_OP_ARANGE;
|
||||
ggml_set_op_params_f32(result, 0, start);
|
||||
ggml_set_op_params_f32(result, 1, stop);
|
||||
ggml_set_op_params_f32(result, 2, step);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_timestep_embedding(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * timesteps,
|
||||
int dim,
|
||||
int max_period) {
|
||||
bool is_node = false;
|
||||
|
||||
if (timesteps->grad) {
|
||||
GGML_ASSERT(false); // TODO: implement backward
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
int actual_dim = dim;
|
||||
if (dim % 2 != 0) {
|
||||
actual_dim = dim + 1;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
|
||||
|
||||
result->op = GGML_OP_TIMESTEP_EMBEDDING;
|
||||
ggml_set_op_params_i32(result, 0, dim);
|
||||
ggml_set_op_params_i32(result, 1, max_period);
|
||||
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = timesteps;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_argsort
|
||||
|
||||
struct ggml_tensor * ggml_argsort(
|
||||
|
@ -10231,7 +10294,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|||
int n_channels = src0->ne[2];
|
||||
int n_groups = dst->op_params[0];
|
||||
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
|
||||
for (int i = ith; i < n_groups; i+=nth) {
|
||||
for (int i = ith; i < n_groups; i += nth) {
|
||||
int start = i * n_channels_per_group;
|
||||
int end = start + n_channels_per_group;
|
||||
if (end > n_channels) {
|
||||
|
@ -10245,28 +10308,32 @@ static void ggml_compute_forward_group_norm_f32(
|
|||
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
||||
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
|
||||
|
||||
ggml_float sumr = 0.0;
|
||||
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||
sum += (ggml_float)x[i00];
|
||||
sumr += (ggml_float)x[i00];
|
||||
}
|
||||
sum += sumr;
|
||||
}
|
||||
}
|
||||
float mean = sum / (ne00 * ne01 * step);
|
||||
ggml_float sum2 = 0.0;
|
||||
const float mean = sum / (ne00 * ne01 * step);
|
||||
|
||||
ggml_float sum2 = 0.0;
|
||||
for (int64_t i02 = start; i02 < end; i02++) {
|
||||
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
||||
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
|
||||
|
||||
float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
|
||||
|
||||
ggml_float sumr = 0.0;
|
||||
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||
float v = x[i00] - mean;
|
||||
y[i00] = v;
|
||||
sum2 += (ggml_float)(v * v);
|
||||
sumr += (ggml_float)(v * v);
|
||||
}
|
||||
sum2 += sumr;
|
||||
}
|
||||
}
|
||||
float variance = sum2 / (ne00 * ne01 * step);
|
||||
const float variance = sum2 / (ne00 * ne01 * step);
|
||||
const float scale = 1.0f / sqrtf(variance + eps);
|
||||
|
||||
for (int64_t i02 = start; i02 < end; i02++) {
|
||||
|
@ -13547,6 +13614,106 @@ static void ggml_compute_forward_pad(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// ggml_compute_forward_arange
|
||||
|
||||
static void ggml_compute_forward_arange_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
const float start = ggml_get_op_params_f32(dst, 0);
|
||||
const float stop = ggml_get_op_params_f32(dst, 1);
|
||||
const float step = ggml_get_op_params_f32(dst, 2);
|
||||
|
||||
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
||||
|
||||
GGML_ASSERT(ggml_nelements(dst) == steps);
|
||||
|
||||
for (int64_t i = ith; i < steps; i+= nth) {
|
||||
float value = start + step * i;
|
||||
((float *)dst->data)[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_arange(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
switch (dst->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_arange_f32(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_timestep_embedding_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const int dim = ggml_get_op_params_i32(dst, 0);
|
||||
const int max_period = ggml_get_op_params_i32(dst, 1);
|
||||
|
||||
int half = dim / 2;
|
||||
|
||||
for (int64_t i = 0; i < ne00; i++) {
|
||||
float * embed_data = (float *)((char *) dst->data + i*nb1);
|
||||
for (int64_t j = ith; j < half; j += nth) {
|
||||
float timestep = ((float *)src0->data)[i];
|
||||
float freq = (float)expf(-logf(max_period) * j / half);
|
||||
float arg = timestep * freq;
|
||||
embed_data[j] = cosf(arg);
|
||||
embed_data[j + half] = sinf(arg);
|
||||
}
|
||||
if (dim % 2 != 0 && ith == 0) {
|
||||
embed_data[dim] = 0.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_timestep_embedding(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_timestep_embedding_f32(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_argsort
|
||||
|
||||
static void ggml_compute_forward_argsort_f32(
|
||||
|
@ -15615,6 +15782,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
{
|
||||
ggml_compute_forward_pad(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_ARANGE:
|
||||
{
|
||||
ggml_compute_forward_arange(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
{
|
||||
ggml_compute_forward_timestep_embedding(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_ARGSORT:
|
||||
{
|
||||
ggml_compute_forward_argsort(params, tensor);
|
||||
|
@ -16617,6 +16792,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
} break;
|
||||
case GGML_OP_ARANGE:
|
||||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
} break;
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
} break;
|
||||
case GGML_OP_ARGSORT:
|
||||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
|
@ -17368,6 +17551,14 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||
{
|
||||
n_tasks = n_threads;
|
||||
} break;
|
||||
case GGML_OP_ARANGE:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
} break;
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
} break;
|
||||
case GGML_OP_ARGSORT:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue