From 2cf4f37e36ad6f48a585d970d67c32483a16ed05 Mon Sep 17 00:00:00 2001
From: FSSRepo <go778sgt@gmail.com>
Date: Fri, 29 Dec 2023 10:32:40 -0500
Subject: [PATCH] add metal backend

---
 examples/llava/CMakeLists.txt |  3 +++
 examples/llava/clip.cpp       | 23 +++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt
index 0bfeac056..69c6be3df 100644
--- a/examples/llava/CMakeLists.txt
+++ b/examples/llava/CMakeLists.txt
@@ -38,3 +38,6 @@ target_compile_features(llava PRIVATE cxx_std_11)
 if(LLAMA_CUBLAS)
     add_definitions(-DCLIP_USE_CUBLAS)
 endif()
+if(LLAMA_METAL)
+    add_definitions(-DCLIP_USE_METAL)
+endif()
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 1bcb7aed7..2ee14d2e5 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -22,6 +22,10 @@
 #include "ggml-cuda.h"
 #endif
 
+#ifdef CLIP_USE_METAL
+#include "ggml-metal.h"
+#endif
+
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 
@@ -512,6 +516,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
     printf("CLIP using CUDA backend\n");
 #endif
 
+#ifdef CLIP_USE_METAL
+    new_clip->backend = ggml_backend_metal_init();
+    printf("CLIP using Metal backend\n");
+#endif
+
     if(!new_clip->backend) {
         new_clip->backend = ggml_backend_cpu_init();
         printf("CLIP using CPU backend\n");
@@ -594,7 +603,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
                 return nullptr;
             }
             int num_bytes = ggml_nbytes(cur);
-            if (ggml_backend_is_cpu(new_clip->backend)) {
+            if (ggml_backend_is_cpu(new_clip->backend)
+#ifdef CLIP_USE_METAL
+            || ggml_backend_is_metal(new_clip->backend)
+#endif
+            ) {
                 // for the CPU and Metal backend, we can read directly into the tensor
                 fin.read(reinterpret_cast<char *>(cur->data), num_bytes);
             } else {
@@ -882,7 +895,13 @@ bool clip_image_batch_encode(const clip_ctx * ctx, const int n_threads, const cl
         ggml_backend_cpu_set_n_threads(ctx->backend, n_threads);
     }
 
-        ggml_backend_graph_compute(ctx->backend, gf);
+#ifdef CLIP_USE_METAL
+    if (ggml_backend_is_metal(ctx->backend)) {
+        ggml_backend_metal_set_n_cb(ctx->backend, n_threads);
+    }
+#endif
+
+    ggml_backend_graph_compute(ctx->backend, gf);
 
     // the last node is the embedding tensor
     struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];