ggml : add ggml_upscale_ext (ggml/814)

* initial commit with CPU implementation of upscale to shape and test, cuda implementation next

* experimental commit to see if dst shape is correct

* test version

* test

* removed unnecessary params

* refactor

* fixed tests

* ggml : metal impl + cleanup + sycl dev warnings

* patched ggml_upscale cuda op to handle non-contiguous tensors, added test for non-contiguous behavior

* metal : fix upsacle op to support nb00 + style

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
John Balis 2024-05-15 03:52:33 -05:00 committed by Georgi Gerganov
parent 583fd6b000
commit 48aa8fd1f2
No known key found for this signature in database
GPG key ID: BF970631944C16B7
7 changed files with 146 additions and 60 deletions

64
ggml.c
View file

@ -6293,7 +6293,10 @@ struct ggml_tensor * ggml_pool_2d(
static struct ggml_tensor * ggml_upscale_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
int scale_factor) {
int ne0,
int ne1,
int ne2,
int ne3) {
bool is_node = false;
if (a->grad) {
@ -6301,19 +6304,45 @@ static struct ggml_tensor * ggml_upscale_impl(
is_node = true;
}
GGML_ASSERT(a->ne[0] <= ne0);
GGML_ASSERT(a->ne[1] <= ne1);
GGML_ASSERT(a->ne[2] <= ne2);
GGML_ASSERT(a->ne[3] <= ne3);
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
a->ne[0] * scale_factor,
a->ne[1] * scale_factor,
a->ne[2], a->ne[3]);
ne0,
ne1,
ne2,
ne3
);
result->op = GGML_OP_UPSCALE;
result->op_params[0] = scale_factor;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return result;
}
struct ggml_tensor * ggml_upscale(
struct ggml_context * ctx,
struct ggml_tensor * a,
int scale_factor) {
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
}
struct ggml_tensor * ggml_upscale_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
int ne0,
int ne1,
int ne2,
int ne3) {
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
}
// ggml_pad
struct ggml_tensor * ggml_pad(
struct ggml_context * ctx,
struct ggml_tensor * a,
@ -6338,12 +6367,7 @@ struct ggml_tensor * ggml_pad(
return result;
}
struct ggml_tensor * ggml_upscale(
struct ggml_context * ctx,
struct ggml_tensor * a,
int scale_factor) {
return ggml_upscale_impl(ctx, a, scale_factor);
}
// ggml_arange
struct ggml_tensor * ggml_arange(
struct ggml_context * ctx,
@ -6365,6 +6389,8 @@ struct ggml_tensor * ggml_arange(
return result;
}
// ggml_timestep_embedding
struct ggml_tensor * ggml_timestep_embedding(
struct ggml_context * ctx,
struct ggml_tensor * timesteps,
@ -14820,25 +14846,28 @@ static void ggml_compute_forward_upscale_f32(
return;
}
GGML_ASSERT(src0->nb[0] == sizeof(float));
GGML_ASSERT(src0->type == GGML_TYPE_F32);
const int ith = params->ith;
const int nth = params->nth;
GGML_TENSOR_UNARY_OP_LOCALS
const int scale_factor = dst->op_params[0];
const float sf0 = (float)ne0/src0->ne[0];
const float sf1 = (float)ne1/src0->ne[1];
const float sf2 = (float)ne2/src0->ne[2];
const float sf3 = (float)ne3/src0->ne[3];
// TODO: optimize
for (int64_t i3 = 0; i3 < ne3; i3++) {
const int64_t i03 = i3;
const int64_t i03 = i3 / sf3;
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
const int64_t i02 = i2;
const int64_t i02 = i2 / sf2;
for (int64_t i1 = 0; i1 < ne1; i1++) {
const int64_t i01 = i1 / scale_factor;
const int64_t i01 = i1 / sf1;
for (int64_t i0 = 0; i0 < ne0; i0++) {
const int64_t i00 = i0 / scale_factor;
const int64_t i00 = i0 / sf0;
const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
@ -14868,6 +14897,7 @@ static void ggml_compute_forward_upscale(
}
}
// ggml_compute_forward_pad
static void ggml_compute_forward_pad_f32(