kompute : use llama_backend_init/llama_backend_free to manage device

2024-01-27 11:55:32 -05:00 · 2024-01-27 11:55:32 -05:00 · 530462550d
commit 530462550d
parent 050d450297
2 changed files with 11 additions and 9 deletions
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@ -1863,9 +1863,6 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) {
 static void ggml_backend_kompute_free(ggml_backend_t backend) {
    struct ggml_kompute_context * ctx = (struct ggml_kompute_context *)backend->context;
    ggml_vk_free(ctx);
    // TODO(cebtenzzre): This should only be done if the device was initialized by us, but
    //                   that would require a change to GPT4All.
    ggml_vk_free_device();
    delete backend;
 }
@ -1901,12 +1898,6 @@ static struct ggml_backend_i kompute_backend_i = {
 };
 ggml_backend_t ggml_backend_kompute_init() {
 #if defined(GGML_USE_KOMPUTE)
    if (!ggml_vk_has_device()) {
        ggml_vk_init_device(0, "gpu");
    }
 #endif
    if (!ggml_vk_has_device()) {
        fprintf(stderr, "%s: error: device was not initialized\n", __func__);
        return nullptr;
--- a/llama.cpp
+++ b/llama.cpp
@ -9860,6 +9860,13 @@ void llama_backend_init(bool numa) {
 #ifdef GGML_USE_MPI
    ggml_mpi_backend_init();
 #endif
 #ifdef GGML_USE_KOMPUTE
    if (!ggml_vk_has_device()) {
        ggml_vk_init_device(0, "gpu");
    }
 #endif
 }
 void llama_backend_free(void) {
@ -9867,6 +9874,10 @@ void llama_backend_free(void) {
    ggml_mpi_backend_free();
 #endif
    ggml_quantize_free();
 #ifdef GGML_USE_KOMPUTE
    ggml_vk_free_device();
 #endif
 }
 int64_t llama_time_us(void) {