CUDA: backwards pass for misc. ops, add tests (#11257)
* CUDA: backwards pass for misc. ops, add tests * remove restrict from pointers
This commit is contained in:
parent
681149ced2
commit
9c8dcefe17
18 changed files with 930 additions and 332 deletions
|
@ -780,7 +780,7 @@ struct test_case {
|
|||
}
|
||||
}
|
||||
if (!any_params) {
|
||||
printf("not supported [%s] \n", op_name);
|
||||
printf("not supported [%s] \n", op_desc(out).c_str());
|
||||
supported = false;
|
||||
}
|
||||
if (!supported) {
|
||||
|
@ -1130,6 +1130,59 @@ struct test_get_rows : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_GET_ROWS_BACK
|
||||
struct test_get_rows_back : public test_case {
|
||||
const ggml_type type;
|
||||
const int n; // cols
|
||||
const int m; // rows
|
||||
const int r; // rows to get
|
||||
const int b; // batch size
|
||||
const bool v; // view (non-contiguous src1)
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR6(type, n, m, r, b, v);
|
||||
}
|
||||
|
||||
test_get_rows_back(ggml_type type = GGML_TYPE_F32, int n = 10, int m = 5, int r = 3, int b = 1, bool v = false)
|
||||
: type(type), n(n), m(m), r(r), b(b), v(v) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * in_forward = ggml_new_tensor_3d(ctx, type, n, m, b);
|
||||
ggml_set_name(in_forward, "in_forward");
|
||||
|
||||
ggml_tensor * rows = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, r, b);
|
||||
ggml_set_name(rows, "rows");
|
||||
if (v) {
|
||||
rows = ggml_view_2d(ctx, rows, r/2, b, rows->nb[1], 0);
|
||||
ggml_set_name(rows, "view_of_rows");
|
||||
}
|
||||
|
||||
ggml_tensor * grad = ggml_new_tensor_3d(ctx, type, n, r, b);
|
||||
ggml_set_name(grad, "grad");
|
||||
|
||||
ggml_tensor * out = ggml_get_rows_back(ctx, grad, rows, in_forward);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
if (t->type == GGML_TYPE_I32) {
|
||||
if (ggml_is_view_op(t->op)) { continue; }
|
||||
// rows
|
||||
std::vector<int> data(r*b);
|
||||
for (int i = 0; i < r*b; i++) {
|
||||
data[i] = rand() % m;
|
||||
}
|
||||
ggml_backend_tensor_set(t, data.data(), 0, r * b * sizeof(int));
|
||||
} else {
|
||||
init_tensor_uniform(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_ARGMAX
|
||||
struct test_argmax : public test_case {
|
||||
const ggml_type type;
|
||||
|
@ -1531,6 +1584,39 @@ struct test_scale : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SILU_BACK
|
||||
struct test_silu_back : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
float eps;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR3(type, ne, eps);
|
||||
}
|
||||
|
||||
test_silu_back(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
||||
float eps = 1e-6f)
|
||||
: type(type), ne(ne), eps(eps) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * grad = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(grad, "grad");
|
||||
|
||||
ggml_tensor * out = ggml_silu_back(ctx, a, grad);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
bool grad_precise() override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_NORM
|
||||
struct test_norm : public test_case {
|
||||
const ggml_type type;
|
||||
|
@ -1583,11 +1669,56 @@ struct test_rms_norm : public test_case {
|
|||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
init_tensor_uniform(t, -10.f, 10.f);
|
||||
}
|
||||
}
|
||||
|
||||
float grad_eps() override {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
bool grad_precise() override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_RMS_NORM_BACK
|
||||
struct test_rms_norm_back : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
float eps;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR3(type, ne, eps);
|
||||
}
|
||||
|
||||
test_rms_norm_back(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
||||
float eps = 1e-6f)
|
||||
: type(type), ne(ne), eps(eps) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
ggml_tensor * out = ggml_rms_norm_back(ctx, a, b, eps);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
init_tensor_uniform(t, -10.f, 10.f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SSM_CONV
|
||||
struct test_ssm_conv : public test_case {
|
||||
const ggml_type type;
|
||||
|
@ -1855,10 +1986,11 @@ struct test_out_prod : public test_case {
|
|||
const int64_t n;
|
||||
const int64_t k;
|
||||
const std::array<int64_t, 2> bs; // dims 3 and 4
|
||||
const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
||||
const bool trans_b;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR7(type_a, type_b, m, n, k, bs, trans_b);
|
||||
return VARS_TO_STR8(type_a, type_b, m, n, k, bs, nr, trans_b);
|
||||
}
|
||||
|
||||
double max_nmse_err() override {
|
||||
|
@ -1868,8 +2000,9 @@ struct test_out_prod : public test_case {
|
|||
test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
||||
int64_t m = 32, int64_t n = 32, int64_t k = 32,
|
||||
std::array<int64_t, 2> bs = {10, 10},
|
||||
std::array<int64_t, 2> nr = {2, 2},
|
||||
bool trans_b = false)
|
||||
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), trans_b(trans_b) {}
|
||||
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), trans_b(trans_b) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type_a, m, k, bs[0], bs[1]);
|
||||
|
@ -1877,10 +2010,10 @@ struct test_out_prod : public test_case {
|
|||
|
||||
ggml_tensor * b;
|
||||
if (trans_b) {
|
||||
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0], bs[1]);
|
||||
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
||||
b = ggml_transpose(ctx, b);
|
||||
} else {
|
||||
b = ggml_new_tensor_4d(ctx, type_b, n, k, bs[0], bs[1]);
|
||||
b = ggml_new_tensor_4d(ctx, type_b, n, k, bs[0]*nr[0], bs[1]*nr[1]);
|
||||
}
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
|
@ -2191,6 +2324,36 @@ struct test_soft_max : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SOFT_MAX_BACK
|
||||
struct test_soft_max_back : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const float scale;
|
||||
const float max_bias;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne, scale, max_bias);
|
||||
}
|
||||
|
||||
test_soft_max_back(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
||||
float scale = 1.0f,
|
||||
float max_bias = 0.0f)
|
||||
: type(type), ne(ne), scale(scale), max_bias(max_bias) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_soft_max_ext_back(ctx, a, b, scale, max_bias);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_ROPE + GGML_OP_ROPE_BACK
|
||||
struct test_rope : public test_case {
|
||||
|
@ -2980,6 +3143,40 @@ struct test_cross_entropy_loss : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
struct test_cross_entropy_loss_back : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR2(type, ne);
|
||||
}
|
||||
|
||||
test_cross_entropy_loss_back(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * grad = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
||||
ggml_set_name(grad, "grad");
|
||||
|
||||
ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(logits, "logits");
|
||||
|
||||
ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(labels, "labels");
|
||||
|
||||
// Ensure labels add up to 1:
|
||||
labels = ggml_soft_max(ctx, labels);
|
||||
ggml_set_name(labels, "labels_normalized");
|
||||
|
||||
ggml_tensor * out = ggml_cross_entropy_loss_back(ctx, grad, logits, labels);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_OPT_STEP_ADAMW
|
||||
struct test_opt_step_adamw : public test_case {
|
||||
const ggml_type type;
|
||||
|
@ -3479,6 +3676,16 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_F32, 1, 8, 2, 1, false));
|
||||
for (ggml_type type : all_types) {
|
||||
for (bool v : {false, true}) {
|
||||
test_cases.emplace_back(new test_get_rows_back(type, 256, 5, 4, 1, v));
|
||||
}
|
||||
}
|
||||
for (bool v : {false, true}) {
|
||||
test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_I32, 256, 5, 4, 1, v));
|
||||
}
|
||||
|
||||
for (ggml_type type_input : {GGML_TYPE_F32}) {
|
||||
for (ggml_op_pool pool_type : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) {
|
||||
for (int k0 : {1, 3}) {
|
||||
|
@ -3657,10 +3864,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
|
||||
test_cases.emplace_back(new test_add1());
|
||||
test_cases.emplace_back(new test_scale());
|
||||
test_cases.emplace_back(new test_silu_back());
|
||||
|
||||
for (float eps : {1e-6f, 1e-5f, 1e-3f, 1e-1f}) {
|
||||
test_cases.emplace_back(new test_norm(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
for (float eps : {0.0f, 1e-7f, 1e-4f, 1e-1f}) {
|
||||
test_cases.emplace_back(new test_norm (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
test_cases.emplace_back(new test_rms_norm (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 1, 1}, {4, 1536, 1, 1}));
|
||||
|
@ -3800,22 +4009,19 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
|
||||
for (ggml_type type_a : base_types) {
|
||||
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, { 1, 1}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 1}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 1}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
||||
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, { 1, 1}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, { 1, 1}, true));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 1}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 1}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
||||
for (int n : {1, 16}) {
|
||||
for (int k : {1, 16}) {
|
||||
for (int bs2 : {1, 3}) {
|
||||
for (int bs3 : {1, 3}) {
|
||||
for (int nr2 : {1, 2}) {
|
||||
for (int nr3 : {1, 2}) {
|
||||
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, n, k, {bs2, bs3}, {nr2, nr3}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3858,11 +4064,22 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
}
|
||||
}
|
||||
}
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, 0.1f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, 0.1f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, 0.1f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 8.0f));
|
||||
|
||||
for (float max_bias : {0.0f, 8.0f}) {
|
||||
for (float scale : {1.0f, 0.1f}) {
|
||||
for (int64_t ne0 : {16, 1024}) {
|
||||
for (int64_t ne1 : {16, 1024}) {
|
||||
test_cases.emplace_back(new test_soft_max_back(GGML_TYPE_F32, {ne0, ne1, 1, 1}, scale, max_bias));
|
||||
test_cases.emplace_back(new test_soft_max_back(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, scale, max_bias));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (bool fw : {true, false}) { // fw == forward
|
||||
bool all = true;
|
||||
|
||||
|
@ -3953,7 +4170,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_cross_entropy_loss());
|
||||
test_cases.emplace_back(new test_cross_entropy_loss (GGML_TYPE_F32, { 10, 5, 4, 3}));
|
||||
test_cases.emplace_back(new test_cross_entropy_loss (GGML_TYPE_F32, {30000, 1, 1, 1}));
|
||||
test_cases.emplace_back(new test_cross_entropy_loss_back(GGML_TYPE_F32, { 10, 5, 4, 3}));
|
||||
test_cases.emplace_back(new test_cross_entropy_loss_back(GGML_TYPE_F32, {30000, 1, 1, 1}));
|
||||
|
||||
test_cases.emplace_back(new test_opt_step_adamw(GGML_TYPE_F32, {10, 5, 4, 3}));
|
||||
|
||||
// these tests are disabled to save execution time, but they can be handy for debugging
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue