cuda : switch to F16 scalars + tune warps for RTX 2060

2024-02-01 15:00:47 +02:00 · 2024-02-01 15:00:47 +02:00 · 9a5c2a1681
commit 9a5c2a1681
parent 2c04beeb81
2 changed files with 61 additions and 47 deletions
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@ -572,9 +572,19 @@ struct test_case {
        // duplicate the op
        size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
        int n_runs = std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1;
+#if 1
        for (int i = 1; i < n_runs; i++) {
            gf->nodes[gf->n_nodes++] = out;
        }
+#else
+        n_runs = 1000;
+        int n_nodes = gf->n_nodes;
+        for (int i = 1; i < n_runs; i++) {
+            for (int j = 0; j < n_nodes; j++) {
+                gf->nodes[gf->n_nodes++] = gf->nodes[j];
+            }
+        }
+#endif

        // calculate memory
        size_t mem = n_runs * op_size(out);
@ -2199,8 +2209,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
    test_cases.emplace_back(new test_pad());
    test_cases.emplace_back(new test_leaky_relu());

-#if 0
-    for (int hs : { 64,  80,  96, 112, 128, 256, }) {
+#if 1
+    for (int hs : { 64,  80, 128, }) {
        for (int nh : { 32, }) {
            for (int kv : { 512, 1024, 2048, 4096, }) {
                for (int nb : { 1, 2, 4, 8, 512, 1024, 2048, }) {