add todos for llama backward pass

- implementation for ADD1 backward pass should probably use sum instead of mean (but this backward pass is not required)
- repeat is not yet tested and looks like it only works for single element src0 inputs.
This commit is contained in:
xaedes 2023-04-30 21:42:52 +02:00
parent 2ecc690980
commit 2277053839
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

5
ggml.c
View file

@ -12873,7 +12873,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
if (src1->grad) { if (src1->grad) {
src1->grad = ggml_add_impl(ctx, src1->grad = ggml_add_impl(ctx,
src1->grad, src1->grad,
ggml_mean(ctx, tensor->grad), ggml_mean(ctx, tensor->grad), // TODO: should probably be sum instead of mean
inplace); inplace);
} }
} break; } break;
@ -12986,7 +12986,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break; } break;
case GGML_OP_REPEAT: case GGML_OP_REPEAT:
{ {
// necessary for llama
if (src0->grad) { if (src0->grad) {
// TODO: is this really correct?
// i think tensor->grad must be reshaped to [*src0->ne[[0,1,2]],-1] and then summed along last axis
src0->grad = src0->grad =
ggml_add_impl(ctx, ggml_add_impl(ctx,
src0->grad, src0->grad,