From 6af02b19d128da1d73085851a6f1f6d6d92a9014 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Fri, 26 Jan 2024 14:42:11 -0500
Subject: [PATCH] kompute : init device automatically and remove an unnecessary
 free

---
 examples/main/main.cpp | 4 ----
 ggml-kompute.cpp       | 9 ++++++++-
 llama.cpp              | 3 ---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 4367cf20c..ef80b5012 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -186,10 +186,6 @@ int main(int argc, char ** argv) {
     g_model = &model;
     g_ctx = &ctx;
 
-#if defined(GGML_USE_KOMPUTE)
-    ggml_vk_init_device(0, "gpu");
-#endif
-
     // load the model and apply lora adapter, if any
     LOG("%s: load the model and apply lora adapter, if any\n", __func__);
     std::tie(model, ctx) = llama_init_from_gpt_params(params);
diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp
index 31c6f0d90..955e7f077 100644
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@@ -1837,6 +1837,8 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) {
 static void ggml_backend_kompute_free(ggml_backend_t backend) {
     struct ggml_kompute_context * ctx = (struct ggml_kompute_context *)backend->context;
     ggml_vk_free(ctx);
+    // TODO(cebtenzzre): This should only be done if the device was initialized by us, but
+    //                   that would require a change to GPT4All.
     ggml_vk_free_device();
     delete backend;
 }
@@ -1873,6 +1875,12 @@ static struct ggml_backend_i kompute_backend_i = {
 };
 
 ggml_backend_t ggml_backend_kompute_init() {
+#if defined(GGML_USE_KOMPUTE)
+    if (!ggml_vk_has_device()) {
+        ggml_vk_init_device(0, "gpu");
+    }
+#endif
+
     if (!ggml_vk_has_device()) {
         fprintf(stderr, "%s: error: device was not initialized\n", __func__);
         return nullptr;
@@ -1897,6 +1905,5 @@ extern "C" ggml_backend_t ggml_backend_reg_kompute_init(const char * params, voi
 ggml_backend_t ggml_backend_reg_kompute_init(const char * params, void * user_data) {
     GGML_UNUSED(params);
     GGML_UNUSED(user_data);
-    ggml_vk_init_device(0, "gpu");
     return ggml_backend_kompute_init();
 }
diff --git a/llama.cpp b/llama.cpp
index 95ec257d5..0da73628b 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -10162,9 +10162,6 @@ struct llama_context * llama_new_context_with_model(
 
 void llama_free(struct llama_context * ctx) {
     delete ctx;
-#ifdef GGML_USE_KOMPUTE
-    ggml_vk_free_device();
-#endif
 }
 
 const llama_model * llama_get_model(const struct llama_context * ctx) {