From 65b0561637511d3565c4b6f3d48dac4e28690241 Mon Sep 17 00:00:00 2001 From: xaedes Date: Fri, 18 Aug 2023 20:25:03 +0200 Subject: [PATCH] remove unnecessary src tensor from ggml_get_rows_back we don't need data of src[2] for computation, only to setup the correct output shape. remove dependency on src[2], so that allocator can work more freely. the computational graph is still completely determined, because the output shape is naturally included. this is similar to how ggml_reshape does it. --- ggml.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/ggml.c b/ggml.c index 90b610721..24f06b4ca 100644 --- a/ggml.c +++ b/ggml.c @@ -6564,7 +6564,9 @@ struct ggml_tensor * ggml_get_rows_back( result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->src[0] = a; result->src[1] = b; - result->src[2] = c; + // we don't need data of c for computation, only to setup the correct output shape. + // break dependency on c, so that allocator can work more freely. + //result->src[2] = c; return result; } @@ -11282,14 +11284,15 @@ static void ggml_compute_forward_get_rows_back_f32_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - const struct ggml_tensor * opt0, struct ggml_tensor * dst) { GGML_ASSERT(params->ith == 0); - GGML_ASSERT(ggml_are_same_shape(opt0, dst)); - GGML_ASSERT(ggml_is_contiguous(opt0)); GGML_ASSERT(ggml_is_contiguous(dst)); - ggml_compute_forward_dup_same_cont(params, opt0, dst); + // ggml_compute_forward_dup_same_cont(params, opt0, dst); + + if (params->type == GGML_TASK_INIT) { + memset(dst->data, 0, ggml_nbytes(dst)); + } if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; @@ -11315,11 +11318,8 @@ static void ggml_compute_forward_get_rows_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - const struct ggml_tensor * opt0, struct ggml_tensor * dst) { GGML_ASSERT(params->ith == 0); - GGML_ASSERT(ggml_are_same_shape(opt0, dst)); - GGML_ASSERT(ggml_is_contiguous(opt0)); GGML_ASSERT(ggml_is_contiguous(dst)); // ggml_compute_forward_dup_same_cont(params, opt0, dst); @@ -11353,16 +11353,15 @@ static void ggml_compute_forward_get_rows_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - const struct ggml_tensor * opt0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { - ggml_compute_forward_get_rows_back_f32_f16(params, src0, src1, opt0, dst); + ggml_compute_forward_get_rows_back_f32_f16(params, src0, src1, dst); } break; case GGML_TYPE_F32: { - ggml_compute_forward_get_rows_back_f32(params, src0, src1, opt0, dst); + ggml_compute_forward_get_rows_back_f32(params, src0, src1, dst); } break; default: { @@ -14867,7 +14866,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_GET_ROWS_BACK: { - ggml_compute_forward_get_rows_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); + ggml_compute_forward_get_rows_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_DIAG: { @@ -15524,6 +15523,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor if (src0->grad) { src0->grad = ggml_add_or_set(ctx, src0->grad, + // last ggml_get_rows_back argument src0->grad is only + // necessary to setup correct output shape ggml_get_rows_back(ctx, tensor->grad, src1, src0->grad), zero_table); }