fix test-grad0 for soft_max

dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
This commit is contained in:
xaedes 2023-07-02 20:58:52 +02:00
parent 2d1e6e0675
commit 864e7e3aa1
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -1345,9 +1345,18 @@ int main(int argc, const char ** argv) {
x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
ggml_set_param(ctx0, x[0]);
struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0]));
float eps = 1e-6f;
// dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
// instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
struct ggml_tensor * f = ggml_sum(ctx0,
ggml_log(ctx0,
ggml_add1(ctx0,
ggml_scale(ctx0,
ggml_soft_max(ctx0, x[0]),
ggml_new_f32(ctx0, 1.0f - eps)),
ggml_new_f32(ctx0, eps))));
check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY);
}
}