add todos for llama backward pass
- implementation for ADD1 backward pass should probably use sum instead of mean (but this backward pass is not required) - repeat is not yet tested and looks like it only works for single element src0 inputs.
This commit is contained in:
parent
2ecc690980
commit
2277053839
1 changed files with 4 additions and 1 deletions
5
ggml.c
5
ggml.c
|
@ -12873,7 +12873,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||||
if (src1->grad) {
|
if (src1->grad) {
|
||||||
src1->grad = ggml_add_impl(ctx,
|
src1->grad = ggml_add_impl(ctx,
|
||||||
src1->grad,
|
src1->grad,
|
||||||
ggml_mean(ctx, tensor->grad),
|
ggml_mean(ctx, tensor->grad), // TODO: should probably be sum instead of mean
|
||||||
inplace);
|
inplace);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
@ -12986,7 +12986,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_REPEAT:
|
case GGML_OP_REPEAT:
|
||||||
{
|
{
|
||||||
|
// necessary for llama
|
||||||
if (src0->grad) {
|
if (src0->grad) {
|
||||||
|
// TODO: is this really correct?
|
||||||
|
// i think tensor->grad must be reshaped to [*src0->ne[[0,1,2]],-1] and then summed along last axis
|
||||||
src0->grad =
|
src0->grad =
|
||||||
ggml_add_impl(ctx,
|
ggml_add_impl(ctx,
|
||||||
src0->grad,
|
src0->grad,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue