implement 8 of 14 missing backward pass operations used by llama

- GGML_OP_ADD_AT
- GGML_OP_CPY
- GGML_OP_MUL_MAT (src0.grad)
- GGML_OP_PERMUTE
- GGML_OP_RESHAPE
- GGML_OP_SCALE
- GGML_OP_TRANSPOSE
- GGML_OP_VIEW

implement additional ggml operation GGML_OP_ADD_AT, which is necessary for backward pass of GGML_OP_VIEW.

this operation adds src1 to src0 with data offset, i.e. to view(src0, ..., offset).
the values are return in a tensor size of src0. values outside of [data+offset:data+offset+nbytes(src1)] are just the original values from src0.

still missing backward passes for llama:

- GGML_OP_DIAG_MASK_INF
- GGML_OP_GET_ROWS
- GGML_OP_RMS_NORM
- GGML_OP_ROPE
- GGML_OP_SILU
- GGML_OP_SOFT_MAX
This commit is contained in:
xaedes 2023-05-01 02:39:54 +02:00
parent 7ff0dcd320
commit 73ac18d856
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1
2 changed files with 568 additions and 40 deletions

595
ggml.c
View file

@ -4966,6 +4966,47 @@ struct ggml_tensor * ggml_add_inplace(
return ggml_add_impl(ctx, a, b, true);
}
struct ggml_tensor * ggml_add_at_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
size_t offset,
bool inplace) {
GGML_ASSERT(ggml_are_same_shape(a, b));
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
result->op = GGML_OP_ADD_AT;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
memcpy(result->padding, &offset, sizeof(size_t));
return result;
}
struct ggml_tensor * ggml_add_at(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
size_t offset) {
return ggml_add_at_impl(ctx, a, b, offset, false);
}
struct ggml_tensor * ggml_add_at_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
size_t offset) {
return ggml_add_at_impl(ctx, a, b, offset, true);
}
// ggml_sub
struct ggml_tensor * ggml_sub_impl(
@ -5577,7 +5618,6 @@ struct ggml_tensor * ggml_scale_impl(
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
GGML_ASSERT(false); // TODO: implement backward
is_node = true;
}
@ -5619,7 +5659,6 @@ struct ggml_tensor * ggml_cpy_impl(
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
GGML_ASSERT(false); // TODO: implement backward
is_node = true;
}
@ -5695,11 +5734,15 @@ struct ggml_tensor * ggml_reshape(
bool is_node = false;
if (a->grad || b->grad) {
GGML_ASSERT(false); // TODO: implement backward
if (a->grad) {
is_node = true;
}
if (b->grad) {
// gradient propagation is not supported
GGML_ASSERT(false);
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a->data);
result->op = GGML_OP_RESHAPE;
@ -5721,7 +5764,6 @@ struct ggml_tensor * ggml_reshape_2d(
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // TODO: implement backward
is_node = true;
}
@ -5748,7 +5790,6 @@ struct ggml_tensor * ggml_reshape_3d(
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // TODO: implement backward
is_node = true;
}
@ -5770,16 +5811,23 @@ struct ggml_tensor * ggml_view_1d(
struct ggml_tensor * a,
int64_t ne0,
size_t offset) {
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // gradient propagation is not supported
is_node = true;
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
result->op = GGML_OP_VIEW;
result->grad = NULL;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = NULL; // TODO: maybe store the offset here?
result->src1 = NULL;
if (is_node) {
memcpy(result->padding, &offset, sizeof(size_t));
}
return result;
}
@ -5793,8 +5841,11 @@ struct ggml_tensor * ggml_view_2d(
int64_t ne1,
size_t nb1,
size_t offset) {
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // gradient propagation is not supported
is_node = true;
}
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
@ -5806,9 +5857,13 @@ struct ggml_tensor * ggml_view_2d(
result->nb[3] = result->nb[2];
result->op = GGML_OP_VIEW;
result->grad = NULL;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = NULL; // TODO: maybe store the offset here?
result->src1 = NULL;
if (is_node) {
memcpy(result->padding, &offset, sizeof(size_t));
}
return result;
}
@ -5824,8 +5879,11 @@ struct ggml_tensor * ggml_view_3d(
size_t nb1,
size_t nb2,
size_t offset) {
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // gradient propagation is not supported
is_node = true;
}
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
@ -5837,9 +5895,13 @@ struct ggml_tensor * ggml_view_3d(
result->nb[3] = result->nb[2]*ne2;
result->op = GGML_OP_VIEW;
result->grad = NULL;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = NULL; // TODO: maybe store the offset here?
result->src1 = NULL;
if (is_node) {
memcpy(result->padding, &offset, sizeof(size_t));
}
return result;
}
@ -5868,7 +5930,6 @@ struct ggml_tensor * ggml_permute(
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // TODO: implement backward
is_node = true;
}
@ -5900,7 +5961,14 @@ struct ggml_tensor * ggml_permute(
result->op = GGML_OP_PERMUTE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = NULL; // TODO: maybe store the permutation here?
result->src1 = NULL;
if (is_node) {
result->padding[0] = axis0;
result->padding[1] = axis1;
result->padding[2] = axis2;
result->padding[3] = axis3;
}
return result;
}
@ -5913,7 +5981,6 @@ struct ggml_tensor * ggml_transpose(
bool is_node = false;
if (a->grad) {
GGML_ASSERT(false); // TODO: implement backward
is_node = true;
}
@ -7206,6 +7273,318 @@ static void ggml_compute_forward_add(
}
}
// ggml_compute_forward_add_at
static void ggml_compute_forward_add_at_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst,
size_t offset) {
// GGML_ASSERT(ggml_are_same_shape(src0, src1)); // TODO: assert that offset+len(src1) <= len(src1)
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
const int ith = params->ith;
const int nth = params->nth;
const int n = ggml_nrows(src0);
const int nc = src0->ne[0];
const size_t nb00 = src0->nb[0];
const size_t nb01 = src0->nb[1];
const size_t nb10 = src1->nb[0];
const size_t nb11 = src1->nb[1];
const size_t nb0 = dst->nb[0];
const size_t nb1 = dst->nb[1];
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
if (nb10 == sizeof(float)) {
for (int j = ith; j < n; j += nth) {
#ifdef GGML_USE_ACCELERATE
vDSP_vadd(
(float *) ((char *) src0->data + j*nb01 + offset), 1,
(float *) ((char *) src1->data + j*nb11), 1,
(float *) ((char *) dst->data + j*nb1 + offset), 1, nc);
#else
ggml_vec_add_f32(nc,
(float *) ((char *) dst->data + j*nb1 + offset),
(float *) ((char *) src0->data + j*nb01 + offset),
(float *) ((char *) src1->data + j*nb11));
#endif
}
} else {
// src1 is not contiguous
for (int j = ith; j < n; j += nth) {
float * dst_ptr = (float *) ((char *) dst->data + j*nb1 + offset);
float * src0_ptr = (float *) ((char *) src0->data + j*nb01 + offset);
for (int i = 0; i < nc; i++) {
float * src1_ptr = (float *) ((char *) src1->data + j*nb11 + i*nb10);
dst_ptr[i] = src0_ptr[i] + *src1_ptr;
}
}
}
}
static void ggml_compute_forward_add_at_f16_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst,
size_t offset) {
// GGML_ASSERT(ggml_are_same_shape(src0, src1)); // TODO: assert that offset+len(src1) <= len(src1)
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
const int ith = params->ith;
const int nth = params->nth;
const int n = ggml_nrows(src0);
const int nc = src0->ne[0];
const size_t nb00 = src0->nb[0];
const size_t nb01 = src0->nb[1];
const size_t nb10 = src1->nb[0];
const size_t nb11 = src1->nb[1];
const size_t nb0 = dst->nb[0];
const size_t nb1 = dst->nb[1];
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
if (nb10 == sizeof(float)) {
for (int j = ith; j < n; j += nth) {
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + j*nb1 + offset);
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + j*nb01 + offset);
for (int i = 0; i < nc; i++) {
float * src1_ptr = (float *) ((char *) src1->data + j*nb11 + i*nb10);
dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + *src1_ptr);
}
}
}
else {
// src1 is not contiguous
GGML_ASSERT(false);
}
}
static void ggml_compute_forward_add_at_f16_f16(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst,
size_t offset) {
// GGML_ASSERT(ggml_are_same_shape(src0, src1)); // TODO: assert that offset+len(src1) <= len(src1)
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
const int ith = params->ith;
const int nth = params->nth;
const int n = ggml_nrows(src0);
const int nc = src0->ne[0];
const size_t nb00 = src0->nb[0];
const size_t nb01 = src0->nb[1];
const size_t nb10 = src1->nb[0];
const size_t nb11 = src1->nb[1];
const size_t nb0 = dst->nb[0];
const size_t nb1 = dst->nb[1];
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
if (nb10 == sizeof(ggml_fp16_t)) {
for (int j = ith; j < n; j += nth) {
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + j*nb1 + offset);
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + j*nb01 + offset);
for (int i = 0; i < nc; i++) {
ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + j*nb11 + i*nb10);
dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + GGML_FP16_TO_FP32(*src1_ptr));
}
}
}
else {
// src1 is not contiguous
GGML_ASSERT(false);
}
}
static void ggml_compute_forward_add_at_q_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst,
size_t offset) {
// GGML_ASSERT(ggml_are_same_shape(src0, src1)); // TODO: assert that offset+len(src1) <= len(src1)
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
const int64_t ne00 = src0->ne[0];
const int64_t ne01 = src0->ne[1];
const int64_t ne02 = src0->ne[2];
const int64_t ne03 = src0->ne[3];
//const int64_t ne10 = src1->ne[0];
//const int64_t ne11 = src1->ne[1];
const int64_t ne12 = src1->ne[2];
const int64_t ne13 = src1->ne[3];
//const int64_t ne0 = dst->ne[0];
//const int64_t ne1 = dst->ne[1];
const int64_t ne2 = dst->ne[2];
const int64_t ne3 = dst->ne[3];
const int nb00 = src0->nb[0];
const int nb01 = src0->nb[1];
const int nb02 = src0->nb[2];
const int nb03 = src0->nb[3];
const int nb10 = src1->nb[0];
const int nb11 = src1->nb[1];
const int nb12 = src1->nb[2];
const int nb13 = src1->nb[3];
const int nb0 = dst->nb[0];
const int nb1 = dst->nb[1];
const int nb2 = dst->nb[2];
const int nb3 = dst->nb[3];
const int ith = params->ith;
const int nth = params->nth;
GGML_ASSERT(ne02 == ne12);
GGML_ASSERT(ne03 == ne13);
GGML_ASSERT(ne2 == ne12);
GGML_ASSERT(ne3 == ne13);
const enum ggml_type type = src0->type;
dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q;
quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q;
// we don't support permuted src0 or src1
GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
GGML_ASSERT(nb10 == sizeof(float));
// dst cannot be transposed or permuted
GGML_ASSERT(nb0 <= nb1);
GGML_ASSERT(nb1 <= nb2);
GGML_ASSERT(nb2 <= nb3);
GGML_ASSERT(ggml_is_quantized(src0->type));
GGML_ASSERT(dst->type == src0->type);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
// total rows in src0
const int nr = ne01*ne02*ne03;
// rows per thread
const int dr = (nr + nth - 1)/nth;
// row range for this thread
const int ir0 = dr*ith;
const int ir1 = MIN(ir0 + dr, nr);
float * wdata = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith;
for (int ir = ir0; ir < ir1; ++ir) {
// src0 indices
const int i03 = ir/(ne02*ne01);
const int i02 = (ir - i03*ne02*ne01)/ne01;
const int i01 = (ir - i03*ne02*ne01 - i02*ne01);
// src1 and dst are same shape as src0 => same indices
const int i13 = i03;
const int i12 = i02;
const int i11 = i01;
const int i3 = i03;
const int i2 = i02;
const int i1 = i01;
void * src0_row = (void *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03) + offset);
float * src1_row = (float *)((char *) src1->data + (i11*nb11 + i12*nb12 + i13*nb13));
void * dst_row = (void *) ((char *) dst->data + ( i1*nb1 + i2*nb2 + i3*nb0) + offset);
assert(ne00 % 32 == 0);
// unquantize row from src0 to temp buffer
dequantize_row_q(src0_row, wdata, ne00);
// add src1
ggml_vec_acc_f32(ne00, wdata, src1_row);
// quantize row to dst
quantize_row_q(wdata, dst_row, ne00);
}
}
static void ggml_compute_forward_add_at(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
size_t offset;
memcpy(&offset, dst->padding, sizeof(size_t));
switch (src0->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_add_at_f32(params, src0, src1, dst, offset);
} break;
case GGML_TYPE_F16:
{
if (src1->type == GGML_TYPE_F16) {
ggml_compute_forward_add_at_f16_f16(params, src0, src1, dst, offset);
}
else if (src1->type == GGML_TYPE_F32) {
ggml_compute_forward_add_at_f16_f32(params, src0, src1, dst, offset);
}
else {
GGML_ASSERT(false);
}
} break;
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_2:
case GGML_TYPE_Q4_3:
{
ggml_compute_forward_add_at_q_f32(params, src0, src1, dst, offset);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_sub
static void ggml_compute_forward_sub_f32(
@ -9220,44 +9599,45 @@ static void ggml_compute_forward_soft_max_f32(
const int ir1 = MIN(ir0 + dr, nr);
for (int i1 = ir0; i1 < ir1; i1++) {
float *p = (float *)((char *) dst->data + i1*dst->nb[1]);
float *sp = (float *)((char *) src0->data + i1*src0->nb[1]);
float *dp = (float *)((char *) dst->data + i1*dst->nb[1]);
#ifndef NDEBUG
for (int i = 0; i < nc; ++i) {
//printf("p[%d] = %f\n", i, p[i]);
assert(!isnan(p[i]));
//printf("sp[%d] = %f\n", i, sp[i]);
assert(!isnan(sp[i]));
}
#endif
float max = -INFINITY;
ggml_vec_max_f32(nc, &max, p);
ggml_vec_max_f32(nc, &max, sp);
ggml_float sum = 0.0;
uint16_t scvt;
for (int i = 0; i < nc; i++) {
//printf("p[%3d] = %8.4f\n", i, p[i]);
if (p[i] == -INFINITY) {
p[i] = 0.0f;
//printf("sp[%3d] = %8.4f\n", i, sp[i]);
if (sp[i] == -INFINITY) {
dp[i] = 0.0f;
} else {
//const float val = (p[i] == -INFINITY) ? 0.0 : exp(p[i] - max);
ggml_fp16_t s = GGML_FP32_TO_FP16(p[i] - max);
ggml_fp16_t s = GGML_FP32_TO_FP16(sp[i] - max);
memcpy(&scvt, &s, sizeof(scvt));
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
sum += (ggml_float)val;
p[i] = val;
dp[i] = val;
}
}
assert(sum > 0.0);
sum = 1.0/sum;
ggml_vec_scale_f32(nc, p, sum);
ggml_vec_scale_f32(nc, dp, sum);
#ifndef NDEBUG
for (int i = 0; i < nc; ++i) {
assert(!isnan(p[i]));
assert(!isinf(p[i]));
assert(!isnan(dp[i]));
assert(!isinf(dp[i]));
}
#endif
}
@ -10956,6 +11336,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_add(params, tensor->src0, tensor->src1, tensor);
} break;
case GGML_OP_ADD_AT:
{
ggml_compute_forward_add_at(params, tensor->src0, tensor->src1, tensor);
} break;
case GGML_OP_SUB:
{
ggml_compute_forward_sub(params, tensor->src0, tensor->src1, tensor);
@ -11140,6 +11524,28 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
src1->grad = ggml_add_impl(ctx, src1->grad, tensor->grad, inplace);
}
} break;
case GGML_OP_ADD_AT:
{
if (src0->grad) {
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
}
if (src1->grad) {
size_t offset;
memcpy(&offset, tensor->padding, sizeof(size_t));
src1->grad =
ggml_add_impl(ctx,
src1->grad,
ggml_view_3d(ctx,
tensor->grad,
tensor->ne[0],
tensor->ne[1],
tensor->ne[2],
tensor->nb[1],
tensor->nb[2],
offset),
inplace);
}
} break;
case GGML_OP_SUB:
{
if (src0->grad) {
@ -11284,6 +11690,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_SILU:
{
// necessary for llama
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_NORM:
@ -11292,31 +11699,83 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_RMS_NORM:
{
// necessary for llama
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_MUL_MAT:
{
// https://cs231n.github.io/optimization-2/#staged
// # forward pass
// s0 = np.random.randn(5, 10)
// s1 = np.random.randn(10, 3)
// t = s0.dot(s1)
// # now suppose we had the gradient on t from above in the circuit
// dt = np.random.randn(*t.shape) # same shape as t
// ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
// ds1 = t.T.dot(dt)
// tensor.T == (src0 @ src1.T).T
// tensor.shape [m,p]
// src0.shape [n,m]
// src1.shape [n,p]
// necessary for llama
if (src0->grad) {
// TODO: this requires outer product - ggml_out_prod(ctx, src1, tensor->grad);
GGML_ASSERT(false);
src0->grad =
ggml_add_impl(ctx,
src0->grad,
// ds0 = dt.dot(s1.T)
// ggml_out_prod(ctx, // [n,m]
// src1, // [n,p]
// tensor->grad), // [m,p]
// for now just using A*B==(B.T*A.T).T
ggml_cont(ctx, // [n,m] not necessary TODO: investigate influence on speed
ggml_transpose(ctx, // [n,m]
ggml_mul_mat(ctx, // [m,n]
ggml_cont(ctx, ggml_transpose(ctx, tensor->grad)), // [p,m]
ggml_cont(ctx, ggml_transpose(ctx, src1))))), // [p,n]
inplace);
}
if (src1->grad) {
src1->grad =
ggml_add_impl(ctx,
src1->grad,
ggml_mul_mat(ctx,
ggml_cont(ctx, ggml_transpose(ctx, src0)),
tensor->grad),
// ds1 = s0.T.dot(dt):
ggml_mul_mat(ctx, // [n,p]
ggml_cont(ctx, ggml_transpose(ctx, src0)), // [m,n]
tensor->grad), // [m,p]
inplace);
}
} break;
case GGML_OP_SCALE:
{
GGML_ASSERT(false); // TODO: not implemented
// necessary for llama
if (src0->grad) {
src0->grad =
ggml_add_impl(ctx,
src0->grad,
ggml_scale_impl(ctx, tensor->grad, src1, false),
inplace);
}
if (src1->grad) {
src1->grad =
ggml_add_impl(ctx,
src1->grad,
ggml_mean(ctx, ggml_mul_impl(ctx, tensor->grad, src0, false)),
inplace);
}
} break;
case GGML_OP_CPY:
{
GGML_ASSERT(false); // TODO: not implemented
// necessary for llama
if (src0->grad) {
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
}
if (src1->grad) {
src1->grad = ggml_add_impl(ctx, src1->grad, tensor->grad, inplace);
}
} break;
case GGML_OP_CONT:
{
@ -11324,34 +11783,78 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_RESHAPE:
{
GGML_ASSERT(false); // TODO: not implemented
// necessary for llama
if (src0->grad) {
src0->grad =
ggml_add_impl(ctx, src0->grad,
ggml_reshape(ctx, tensor->grad, src1),
inplace);
}
if (src1->grad) {
// noop
}
} break;
case GGML_OP_VIEW:
{
GGML_ASSERT(false); // not supported
// necessary for llama
if (src0->grad) {
size_t offset;
memcpy(&offset, tensor->padding, sizeof(size_t));
src0->grad = ggml_add_at_impl(ctx, src0->grad, tensor->grad, offset, inplace);
}
} break;
case GGML_OP_PERMUTE:
{
GGML_ASSERT(false); // TODO: not implemented
// necessary for llama
if (src0->grad) {
int axis0 = tensor->padding[0] & 0x3;
int axis1 = tensor->padding[1] & 0x3;
int axis2 = tensor->padding[2] & 0x3;
int axis3 = tensor->padding[3] & 0x3;
int axes_backward[4] = {0,0,0,0};
axes_backward[axis0] = 0;
axes_backward[axis1] = 1;
axes_backward[axis2] = 2;
axes_backward[axis3] = 3;
src0->grad =
ggml_add_impl(ctx, src0->grad,
ggml_permute(ctx,
tensor->grad,
axes_backward[0],
axes_backward[1],
axes_backward[2],
axes_backward[3]),
inplace);
}
} break;
case GGML_OP_TRANSPOSE:
{
GGML_ASSERT(false); // TODO: not implemented
// necessary for llama
if (src0->grad) {
src0->grad =
ggml_add_impl(ctx, src0->grad,
ggml_transpose(ctx, tensor->grad),
inplace);
}
} break;
case GGML_OP_GET_ROWS:
{
// necessary for llama
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_DIAG_MASK_INF:
{
// necessary for llama
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_SOFT_MAX:
{
// necessary for llama
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_ROPE:
{
// necessary for llama
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_CONV_1D_1S:
@ -11715,6 +12218,18 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src0->ne[0] * n_threads;
}
work_size = MAX(work_size, cur);
} break;
case GGML_OP_ADD_AT:
{
node->n_tasks = n_threads;
size_t cur = 0;
if (ggml_is_quantized(node->src0->type)) {
cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src1->ne[0] * n_threads;
}
work_size = MAX(work_size, cur);
} break;
case GGML_OP_SUB:

13
ggml.h
View file

@ -252,6 +252,7 @@ extern "C" {
GGML_OP_DUP,
GGML_OP_ADD,
GGML_OP_ADD_AT,
GGML_OP_SUB,
GGML_OP_MUL,
GGML_OP_DIV,
@ -480,6 +481,18 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
GGML_API struct ggml_tensor * ggml_add_at(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
size_t offset);
GGML_API struct ggml_tensor * ggml_add_at_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
size_t offset);
GGML_API struct ggml_tensor * ggml_sub(
struct ggml_context * ctx,
struct ggml_tensor * a,