metal : adapting to ggml_backend (WIP)

2023-07-18 16:54:41 +03:00 · 2023-07-18 16:54:41 +03:00 · 0a3861c47b
commit 0a3861c47b
parent 1102ff56db
3 changed files with 99 additions and 41 deletions
--- a/ggml-metal.h
+++ b/ggml-metal.h
@ -19,51 +19,56 @@

 #pragma once

+#include "ggml.h"
+
 #include <stddef.h>
 #include <stdbool.h>

 // max memory buffers that can be mapped to the device
 #define GGML_METAL_MAX_BUFFERS 16

-struct ggml_tensor;
-struct ggml_cgraph;
+//struct ggml_tensor;
+//struct ggml_cgraph;

 #ifdef __cplusplus
 extern "C" {
 #endif

-struct ggml_metal_context;
+// GG: maybe return ptr and avoid the "ggml.h" include
+struct ggml_backend ggml_backend_metal_init();

-// number of command buffers to use
-struct ggml_metal_context * ggml_metal_init(int n_cb);
-void ggml_metal_free(struct ggml_metal_context * ctx);
-
-// set the number of command buffers to use
-void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
-
-// creates a mapping between a host memory buffer and a device memory buffer
-// - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
-// - the mapping is used during computation to determine the arguments of the compute kernels
-// - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
-// - max_size specifies the maximum size of a tensor and is used to create shared views such
-//   that it is guaranteed that the tensor will fit in at least one of the views
+//struct ggml_metal_context;
 //
-bool ggml_metal_add_buffer(
-        struct ggml_metal_context * ctx,
-                       const char * name,
-                             void * data,
-                           size_t   size,
-                           size_t   max_size);
-
-// set data from host memory into the device
-void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
-
-// get data from the device into host memory
-void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
-
-// same as ggml_graph_compute but uses Metal
-// creates gf->n_threads command buffers in parallel
-void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
+//// number of command buffers to use
+//struct ggml_metal_context * ggml_metal_init(int n_cb);
+//void ggml_metal_free(struct ggml_metal_context * ctx);
+//
+//// set the number of command buffers to use
+//void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
+//
+//// creates a mapping between a host memory buffer and a device memory buffer
+//// - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
+//// - the mapping is used during computation to determine the arguments of the compute kernels
+//// - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
+//// - max_size specifies the maximum size of a tensor and is used to create shared views such
+////   that it is guaranteed that the tensor will fit in at least one of the views
+////
+//bool ggml_metal_add_buffer(
+//        struct ggml_metal_context * ctx,
+//                       const char * name,
+//                             void * data,
+//                           size_t   size,
+//                           size_t   max_size);
+//
+//// set data from host memory into the device
+//void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
+//
+//// get data from the device into host memory
+//void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
+//
+//// same as ggml_graph_compute but uses Metal
+//// creates gf->n_threads command buffers in parallel
+//void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);

 #ifdef __cplusplus
 }