From 530462550d611df9defd83f9c73d031a077e58c6 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Sat, 27 Jan 2024 11:55:32 -0500
Subject: [PATCH] kompute : use llama_backend_init/llama_backend_free to manage
 device

---
 ggml-kompute.cpp |  9 ---------
 llama.cpp        | 11 +++++++++++
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp
index df01d6196..f6bba6838 100644
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@@ -1863,9 +1863,6 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) {
 static void ggml_backend_kompute_free(ggml_backend_t backend) {
     struct ggml_kompute_context * ctx = (struct ggml_kompute_context *)backend->context;
     ggml_vk_free(ctx);
-    // TODO(cebtenzzre): This should only be done if the device was initialized by us, but
-    //                   that would require a change to GPT4All.
-    ggml_vk_free_device();
     delete backend;
 }
 
@@ -1901,12 +1898,6 @@ static struct ggml_backend_i kompute_backend_i = {
 };
 
 ggml_backend_t ggml_backend_kompute_init() {
-#if defined(GGML_USE_KOMPUTE)
-    if (!ggml_vk_has_device()) {
-        ggml_vk_init_device(0, "gpu");
-    }
-#endif
-
     if (!ggml_vk_has_device()) {
         fprintf(stderr, "%s: error: device was not initialized\n", __func__);
         return nullptr;
diff --git a/llama.cpp b/llama.cpp
index 0da73628b..b97d4d960 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -9860,6 +9860,13 @@ void llama_backend_init(bool numa) {
 #ifdef GGML_USE_MPI
     ggml_mpi_backend_init();
 #endif
+
+#ifdef GGML_USE_KOMPUTE
+    if (!ggml_vk_has_device()) {
+        ggml_vk_init_device(0, "gpu");
+    }
+#endif
+
 }
 
 void llama_backend_free(void) {
@@ -9867,6 +9874,10 @@ void llama_backend_free(void) {
     ggml_mpi_backend_free();
 #endif
     ggml_quantize_free();
+
+#ifdef GGML_USE_KOMPUTE
+    ggml_vk_free_device();
+#endif
 }
 
 int64_t llama_time_us(void) {