sync : ggml (backend v2) (#3912)

* sync : ggml (backend v2) (wip) * sync : migrate examples and llama.cpp to dynamic graphs (wip) * sync : update tests + fix max op params to 64 ggml-ci * sync : ggml-cuda ggml-ci * llama : fix save/load state context size ggml-ci * sync : try to fix build on tvOS * sync : pass custom graph sizes in training examples * sync : update graph copies to new ggml API * sync : update sync-ggml.sh with new files * scripts : fix header in sync script * train : fix context size calculations * llama : increase inference graph size up to 4096 nodes * train : allocate grads for backward graphs * train : allocate grads for gb_tmp
2023-11-13 14:16:23 +02:00 · 2023-11-13 14:16:23 +02:00 · 4760e7cc0b
commit 4760e7cc0b
parent bb50a792ec
22 changed files with 1994 additions and 864 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -1,5 +1,6 @@
 #import "ggml-metal.h"

+#import "ggml-backend-impl.h"
 #import "ggml.h"

 #import <Foundation/Foundation.h>
@ -23,7 +24,7 @@

 #define UNUSED(x) (void)(x)

-#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
+#define GGML_MAX_CONCUR (2*GGML_DEFAULT_GRAPH_SIZE)

 struct ggml_metal_buffer {
    const char * name;
@ -744,6 +745,20 @@ void ggml_metal_graph_compute(
                struct ggml_tensor * src1 = gf->nodes[i]->src[1];
                struct ggml_tensor * dst  = gf->nodes[i];

+                switch (dst->op) {
+                    case GGML_OP_NONE:
+                    case GGML_OP_RESHAPE:
+                    case GGML_OP_VIEW:
+                    case GGML_OP_TRANSPOSE:
+                    case GGML_OP_PERMUTE:
+                        {
+                            // noop -> next node
+                        } continue;
+                    default:
+                        {
+                        } break;
+                }
+
                const int64_t  ne00 = src0 ? src0->ne[0] : 0;
                const int64_t  ne01 = src0 ? src0->ne[1] : 0;
                const int64_t  ne02 = src0 ? src0->ne[2] : 0;
@ -797,14 +812,6 @@ void ggml_metal_graph_compute(
                //}

                switch (dst->op) {
-                    case GGML_OP_NONE:
-                    case GGML_OP_RESHAPE:
-                    case GGML_OP_VIEW:
-                    case GGML_OP_TRANSPOSE:
-                    case GGML_OP_PERMUTE:
-                        {
-                            // noop
-                        } break;
                    case GGML_OP_CONCAT:
                        {
                            const int64_t nb = ne00;