From 2277053839ecf674e5e1e23b00269e4a28ee999a Mon Sep 17 00:00:00 2001 From: xaedes Date: Sun, 30 Apr 2023 21:42:52 +0200 Subject: [PATCH] add todos for llama backward pass - implementation for ADD1 backward pass should probably use sum instead of mean (but this backward pass is not required) - repeat is not yet tested and looks like it only works for single element src0 inputs. --- ggml.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ggml.c b/ggml.c index 85e5e941c..4fe4d748b 100644 --- a/ggml.c +++ b/ggml.c @@ -12873,7 +12873,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, - ggml_mean(ctx, tensor->grad), + ggml_mean(ctx, tensor->grad), // TODO: should probably be sum instead of mean inplace); } } break; @@ -12986,7 +12986,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_REPEAT: { + // necessary for llama if (src0->grad) { + // TODO: is this really correct? + // i think tensor->grad must be reshaped to [*src0->ne[[0,1,2]],-1] and then summed along last axis src0->grad = ggml_add_impl(ctx, src0->grad,