From 2cf4f37e36ad6f48a585d970d67c32483a16ed05 Mon Sep 17 00:00:00 2001 From: FSSRepo Date: Fri, 29 Dec 2023 10:32:40 -0500 Subject: [PATCH] add metal backend --- examples/llava/CMakeLists.txt | 3 +++ examples/llava/clip.cpp | 23 +++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt index 0bfeac056..69c6be3df 100644 --- a/examples/llava/CMakeLists.txt +++ b/examples/llava/CMakeLists.txt @@ -38,3 +38,6 @@ target_compile_features(llava PRIVATE cxx_std_11) if(LLAMA_CUBLAS) add_definitions(-DCLIP_USE_CUBLAS) endif() +if(LLAMA_METAL) + add_definitions(-DCLIP_USE_METAL) +endif() diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 1bcb7aed7..2ee14d2e5 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -22,6 +22,10 @@ #include "ggml-cuda.h" #endif +#ifdef CLIP_USE_METAL +#include "ggml-metal.h" +#endif + #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" @@ -512,6 +516,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { printf("CLIP using CUDA backend\n"); #endif +#ifdef CLIP_USE_METAL + new_clip->backend = ggml_backend_metal_init(); + printf("CLIP using Metal backend\n"); +#endif + if(!new_clip->backend) { new_clip->backend = ggml_backend_cpu_init(); printf("CLIP using CPU backend\n"); @@ -594,7 +603,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { return nullptr; } int num_bytes = ggml_nbytes(cur); - if (ggml_backend_is_cpu(new_clip->backend)) { + if (ggml_backend_is_cpu(new_clip->backend) +#ifdef CLIP_USE_METAL + || ggml_backend_is_metal(new_clip->backend) +#endif + ) { // for the CPU and Metal backend, we can read directly into the tensor fin.read(reinterpret_cast(cur->data), num_bytes); } else { @@ -882,7 +895,13 @@ bool clip_image_batch_encode(const clip_ctx * ctx, const int n_threads, const cl ggml_backend_cpu_set_n_threads(ctx->backend, n_threads); } - ggml_backend_graph_compute(ctx->backend, gf); +#ifdef CLIP_USE_METAL + if (ggml_backend_is_metal(ctx->backend)) { + ggml_backend_metal_set_n_cb(ctx->backend, n_threads); + } +#endif + + ggml_backend_graph_compute(ctx->backend, gf); // the last node is the embedding tensor struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];