ggml : sync (unary ops refactor, static-correctness) (#2370)
* ggml : sync (unary ops, tests) ggml-ci * tests : remove unnecessary funcs
This commit is contained in:
parent
42f70cb2f6
commit
5b2b2dc6ae
6 changed files with 867 additions and 572 deletions
|
@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) {
|
|||
}
|
||||
}
|
||||
|
||||
struct ggml_tensor * get_random_tensor(
|
||||
struct ggml_tensor * get_random_tensor_f32(
|
||||
struct ggml_context * ctx0,
|
||||
int ndims,
|
||||
int64_t ne[],
|
||||
|
@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor(
|
|||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * get_random_tensor_int(
|
||||
struct ggml_tensor * get_random_tensor_f16(
|
||||
struct ggml_context * ctx0,
|
||||
int ndims,
|
||||
int64_t ne[],
|
||||
float fmin,
|
||||
float fmax) {
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne);
|
||||
|
||||
switch (ndims) {
|
||||
case 1:
|
||||
for (int i0 = 0; i0 < ne[0]; i0++) {
|
||||
((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int i1 = 0; i1 < ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < ne[0]; i0++) {
|
||||
((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (int i2 = 0; i2 < ne[2]; i2++) {
|
||||
for (int i1 = 0; i1 < ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < ne[0]; i0++) {
|
||||
((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
for (int i3 = 0; i3 < ne[3]; i3++) {
|
||||
for (int i2 = 0; i2 < ne[2]; i2++) {
|
||||
for (int i1 = 0; i1 < ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < ne[0]; i0++) {
|
||||
((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * get_random_tensor_i32(
|
||||
struct ggml_context * ctx0,
|
||||
int ndims,
|
||||
int64_t ne[],
|
||||
|
@ -160,23 +208,6 @@ struct ggml_tensor * get_random_tensor_int(
|
|||
return result;
|
||||
}
|
||||
|
||||
float get_element(const struct ggml_tensor * t, int idx) {
|
||||
if (t->type == GGML_TYPE_F32) {
|
||||
return ((float *)t->data)[idx];
|
||||
}
|
||||
|
||||
if (t->type == GGML_TYPE_I32) {
|
||||
return ((int32_t *)t->data)[idx];
|
||||
}
|
||||
|
||||
assert(false);
|
||||
return INFINITY;
|
||||
}
|
||||
|
||||
void set_element(struct ggml_tensor * t, int idx, float value) {
|
||||
((float *)t->data)[idx] = value;
|
||||
}
|
||||
|
||||
void print_elements(const char* label, const struct ggml_tensor * t) {
|
||||
if (!t) {
|
||||
printf("%s: %s = null\n", __func__, label);
|
||||
|
@ -186,7 +217,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) {
|
|||
printf("%s: %s = [", __func__, label);
|
||||
for (int k = 0; k < nelements; ++k) {
|
||||
if (k > 0) { printf(", "); }
|
||||
printf("%.5f", get_element(t, k));
|
||||
printf("%.5f", ggml_get_f32_1d(t, k));
|
||||
}
|
||||
printf("] shape: [");
|
||||
for (int k = 0; k < t->n_dims; ++k) {
|
||||
|
@ -237,23 +268,23 @@ bool check_gradient(
|
|||
const int nelements = ggml_nelements(x[i]);
|
||||
for (int k = 0; k < nelements; ++k) {
|
||||
// compute gradient using finite differences
|
||||
const float x0 = get_element(x[i], k);
|
||||
const float x0 = ggml_get_f32_1d(x[i], k);
|
||||
const float xm = x0 - eps;
|
||||
const float xp = x0 + eps;
|
||||
set_element(x[i], k, xp);
|
||||
ggml_set_f32_1d(x[i], k, xp);
|
||||
|
||||
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
|
||||
|
||||
const float f0 = ggml_get_f32_1d(f, 0);
|
||||
|
||||
set_element(x[i], k, xm);
|
||||
ggml_set_f32_1d(x[i], k, xm);
|
||||
|
||||
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
|
||||
|
||||
const float f1 = ggml_get_f32_1d(f, 0);
|
||||
const float g0 = (f0 - f1)/(2.0f*eps);
|
||||
|
||||
set_element(x[i], k, x0);
|
||||
ggml_set_f32_1d(x[i], k, x0);
|
||||
|
||||
// compute gradient using backward graph
|
||||
ggml_graph_reset (&gf);
|
||||
|
@ -261,7 +292,7 @@ bool check_gradient(
|
|||
|
||||
ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
|
||||
|
||||
const float g1 = get_element(x[i]->grad, k);
|
||||
const float g1 = ggml_get_f32_1d(x[i]->grad, k);
|
||||
|
||||
const float error_abs = fabsf(g0 - g1);
|
||||
const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0;
|
||||
|
@ -392,19 +423,35 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
struct ggml_tensor * x[MAX_NARGS];
|
||||
|
||||
// add
|
||||
// add f32
|
||||
{
|
||||
const int nargs = 2;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
|
||||
|
||||
check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
|
||||
check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// add f16
|
||||
{
|
||||
const int nargs = 2;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
|
||||
|
||||
check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -414,7 +461,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -430,7 +477,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -446,7 +493,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -462,7 +509,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -478,7 +525,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -494,7 +541,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -510,7 +557,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -527,7 +574,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -537,6 +584,40 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// mean, not yet fully implemented
|
||||
if(0)
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
|
||||
|
||||
check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// argmax
|
||||
if (0)
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
|
||||
|
||||
check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// repeat
|
||||
{
|
||||
int64_t ne2[4];
|
||||
|
@ -549,15 +630,36 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
const int nargs = 1;
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
|
||||
|
||||
check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
|
||||
}
|
||||
}
|
||||
|
||||
// repeat back
|
||||
{
|
||||
int64_t ne2[4];
|
||||
get_random_dims(ne2, 4);
|
||||
|
||||
ne2[0] = ne[0] * ne2[0];
|
||||
ne2[1] = ne[1] * ne2[1];
|
||||
ne2[2] = 1;
|
||||
ne2[3] = 1;
|
||||
|
||||
const int nargs = 1;
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
|
||||
|
||||
check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
|
||||
}
|
||||
}
|
||||
|
||||
// abs (finite differences do not work)
|
||||
|
@ -566,7 +668,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
// for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
// for (int i = 0; i < nargs; ++i) {
|
||||
// x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
// x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
// ggml_set_param(ctx0, x[i]);
|
||||
// }
|
||||
|
||||
|
@ -576,17 +678,82 @@ int main(int argc, const char ** argv) {
|
|||
// }
|
||||
//}
|
||||
|
||||
// sgn
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
|
||||
|
||||
check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// neg
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
|
||||
|
||||
check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// step
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
|
||||
|
||||
check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// tanh, not yet fully implemented
|
||||
if(0)
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
|
||||
|
||||
check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// mul_mat
|
||||
{
|
||||
const int nargs = 2;
|
||||
|
||||
for (int ndims = 2; ndims <= 2; ++ndims) {
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
{
|
||||
int64_t ne2[4];
|
||||
get_random_dims(ne2, 4);
|
||||
ne2[0] = ne[0];
|
||||
x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
}
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
@ -602,13 +769,63 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// elu, not yet fully implemented
|
||||
if(0)
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
|
||||
|
||||
check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// relu
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
|
||||
|
||||
check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
|
||||
}
|
||||
}
|
||||
|
||||
// gelu, not yet fully implemented
|
||||
if(0)
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
|
||||
|
||||
check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
// silu
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -629,7 +846,7 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
|
||||
|
@ -647,8 +864,8 @@ int main(int argc, const char ** argv) {
|
|||
ne2[0] = 1;
|
||||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
@ -659,20 +876,37 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// cpy
|
||||
// cpy f32
|
||||
{
|
||||
const int nargs = 2;
|
||||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
// x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
|
||||
|
||||
check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
|
||||
check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
|
||||
}
|
||||
}
|
||||
|
||||
// cpy f16
|
||||
{
|
||||
const int nargs = 2;
|
||||
|
||||
for (int ndims = 1; ndims <= 2; ++ndims) {
|
||||
for (int i = 0; i < nargs; ++i) {
|
||||
x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[i]);
|
||||
}
|
||||
// x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
|
||||
|
||||
check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -689,8 +923,8 @@ int main(int argc, const char ** argv) {
|
|||
for (int i = 0; i < ndims; ++i) {
|
||||
ne2[0] *= ne[i];
|
||||
}
|
||||
x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
||||
|
@ -712,8 +946,8 @@ int main(int argc, const char ** argv) {
|
|||
for (int i = 0; i < ndims; ++i) {
|
||||
ne2[0] *= ne[i];
|
||||
}
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
||||
|
@ -729,7 +963,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 2;
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
get_random_dims(ne2, 1);
|
||||
|
@ -737,7 +971,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 1);
|
||||
}
|
||||
|
||||
x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
||||
const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
|
||||
|
@ -758,7 +992,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 2;
|
||||
for (int ndims = 2; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
get_random_dims(ne2, 2);
|
||||
|
@ -766,7 +1000,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 2);
|
||||
}
|
||||
|
||||
x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
||||
max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
|
||||
|
@ -790,7 +1024,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 2;
|
||||
for (int ndims = 3; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
get_random_dims(ne2, 3);
|
||||
|
@ -798,7 +1032,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 3);
|
||||
}
|
||||
|
||||
x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
||||
max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
|
||||
|
@ -824,7 +1058,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 2;
|
||||
for (int ndims = 4; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
get_random_dims(ne2, 4);
|
||||
|
@ -832,7 +1066,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 4);
|
||||
}
|
||||
|
||||
x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
||||
max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
|
||||
|
@ -858,7 +1092,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 2;
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
get_random_dims(ne2, 1);
|
||||
|
@ -866,7 +1100,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 1);
|
||||
}
|
||||
|
||||
x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
||||
const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
|
||||
|
@ -887,7 +1121,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 1;
|
||||
for (int ndims = 2; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
get_random_dims(ne2, 2);
|
||||
|
@ -895,7 +1129,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 2);
|
||||
}
|
||||
|
||||
x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
|
||||
max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
|
||||
|
@ -915,7 +1149,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 1;
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
@ -941,7 +1175,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 1;
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
|
||||
get_random_dims(ne2, 2);
|
||||
while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
|
||||
|
@ -971,7 +1205,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 1;
|
||||
for (int ndims = 1; ndims <= 4; ++ndims) {
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
|
||||
get_random_dims(ne2, 3);
|
||||
while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
|
||||
|
@ -1010,7 +1244,7 @@ int main(int argc, const char ** argv) {
|
|||
for (int i=ndims; i<4; ++i) {
|
||||
ne2[i] = 1;
|
||||
}
|
||||
x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
@ -1043,7 +1277,7 @@ int main(int argc, const char ** argv) {
|
|||
for (int i=ndims; i<4; ++i) {
|
||||
ne2[i] = 1;
|
||||
}
|
||||
x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
@ -1060,8 +1294,8 @@ int main(int argc, const char ** argv) {
|
|||
int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
|
||||
const int nargs = 1;
|
||||
const int ndims = 2;
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
@ -1075,7 +1309,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 1;
|
||||
const int ndims = 2;
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
int n_past = irand(ne[0]);
|
||||
|
@ -1090,7 +1324,7 @@ int main(int argc, const char ** argv) {
|
|||
const int nargs = 1;
|
||||
const int ndims = 2;
|
||||
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
int n_past = irand(ne[0]);
|
||||
|
@ -1108,7 +1342,7 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 4);
|
||||
|
||||
for (int ndims = 1; ndims <= 3; ++ndims) {
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0]));
|
||||
|
@ -1125,8 +1359,8 @@ int main(int argc, const char ** argv) {
|
|||
get_random_dims(ne2, 4);
|
||||
|
||||
for (int ndims = 1; ndims <= 3; ++ndims) {
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1]));
|
||||
|
@ -1136,7 +1370,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// rope
|
||||
// rope f32
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
|
@ -1148,7 +1382,7 @@ int main(int argc, const char ** argv) {
|
|||
for (int ndims = 3; ndims <= 4; ++ndims) {
|
||||
for (int mode = 0; mode < 4; ++mode) {
|
||||
for (int n_past = 1; n_past < ne2[2]; ++n_past) {
|
||||
x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
|
@ -1163,14 +1397,48 @@ int main(int argc, const char ** argv) {
|
|||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
|
||||
|
||||
GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
|
||||
check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
|
||||
GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
|
||||
check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flash_attn
|
||||
// rope f16
|
||||
{
|
||||
const int nargs = 1;
|
||||
|
||||
int64_t ne2[4];
|
||||
get_random_dims(ne2, 4);
|
||||
ne2[0] += ne2[0] % 2;
|
||||
int n_rot = ne2[0];
|
||||
|
||||
for (int ndims = 3; ndims <= 4; ++ndims) {
|
||||
for (int mode = 0; mode < 4; ++mode) {
|
||||
for (int n_past = 1; n_past < ne2[2]; ++n_past) {
|
||||
x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
||||
const bool skip_past = (mode & 1);
|
||||
if (skip_past) {
|
||||
// we have no past, so this would have to work on uninitialized memory.
|
||||
// we only test the gradients here;
|
||||
// skip_past should have no influence on gradient computation.
|
||||
// so when other modes work, we assume that this does as well.
|
||||
continue;
|
||||
}
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
|
||||
|
||||
GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
|
||||
check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flash_attn f32
|
||||
{
|
||||
const int nargs = 3;
|
||||
|
||||
|
@ -1196,16 +1464,57 @@ int main(int argc, const char ** argv) {
|
|||
nek[3] = 1;
|
||||
nev[3] = 1;
|
||||
}
|
||||
x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f);
|
||||
x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f);
|
||||
x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f);
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
|
||||
x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
|
||||
x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
ggml_set_param(ctx0, x[2]);
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
|
||||
|
||||
check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
|
||||
check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flash_attn f16, not yet fully implemented
|
||||
if(0)
|
||||
{
|
||||
const int nargs = 3;
|
||||
|
||||
int64_t ne2[4];
|
||||
|
||||
get_random_dims(ne2, 4);
|
||||
int64_t D = ne2[0];
|
||||
int64_t N = ne2[1];
|
||||
int64_t M = ne2[2] + N;
|
||||
int64_t B = ne2[3];
|
||||
|
||||
for (int masked = 0; masked <= 1; ++masked) {
|
||||
for (int ndims = 2; ndims <= 4; ++ndims) {
|
||||
int64_t neq[4] = { D, N, B, ne[3] };
|
||||
int64_t nek[4] = { D, M, B, ne[3] };
|
||||
int64_t nev[4] = { M, D, B, ne[3] };
|
||||
if (ndims == 2) {
|
||||
neq[2] = 1; neq[3] = 1;
|
||||
nek[2] = 1; nek[3] = 1;
|
||||
nev[2] = 1; nev[3] = 1;
|
||||
} else if (ndims == 3) {
|
||||
neq[3] = 1;
|
||||
nek[3] = 1;
|
||||
nev[3] = 1;
|
||||
}
|
||||
x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f);
|
||||
x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f);
|
||||
x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f);
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
ggml_set_param(ctx0, x[1]);
|
||||
ggml_set_param(ctx0, x[2]);
|
||||
|
||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
|
||||
|
||||
check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue