add todos for llama backward pass

- implementation for ADD1 backward pass should probably use sum instead of mean (but this backward pass is not required) - repeat is not yet tested and looks like it only works for single element src0 inputs.
2023-04-30 21:42:52 +02:00 · 2023-04-30 21:42:52 +02:00 · 2277053839
commit 2277053839
parent 2ecc690980
1 changed files with 4 additions and 1 deletions
--- a/ggml.c
+++ b/ggml.c
@ -12873,7 +12873,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                if (src1->grad) {
                    src1->grad = ggml_add_impl(ctx,
                        src1->grad,
-                        ggml_mean(ctx, tensor->grad),
+                        ggml_mean(ctx, tensor->grad), // TODO: should probably be sum instead of mean
                        inplace);
                }
            } break;
@ -12986,7 +12986,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
            } break;
        case GGML_OP_REPEAT:
            {
+                // necessary for llama
                if (src0->grad) {
+                    // TODO: is this really correct?
+                    // i think tensor->grad must be reshaped to [*src0->ne[[0,1,2]],-1] and then summed along last axis
                    src0->grad =
                        ggml_add_impl(ctx,
                                src0->grad,