From 864e7e3aa1b08c4b2cd8cc2f17e0722fd019ffca Mon Sep 17 00:00:00 2001 From: xaedes Date: Sun, 2 Jul 2023 20:58:52 +0200 Subject: [PATCH] fix test-grad0 for soft_max dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0) --- tests/test-grad0.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/test-grad0.c b/tests/test-grad0.c index dc19c1ad2..edc7e2834 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -1345,9 +1345,18 @@ int main(int argc, const char ** argv) { x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); - struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); + float eps = 1e-6f; + // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work + // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0) + struct ggml_tensor * f = ggml_sum(ctx0, + ggml_log(ctx0, + ggml_add1(ctx0, + ggml_scale(ctx0, + ggml_soft_max(ctx0, x[0]), + ggml_new_f32(ctx0, 1.0f - eps)), + ggml_new_f32(ctx0, eps)))); - check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY); } }