From 6af02b19d128da1d73085851a6f1f6d6d92a9014 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 26 Jan 2024 14:42:11 -0500 Subject: [PATCH] kompute : init device automatically and remove an unnecessary free --- examples/main/main.cpp | 4 ---- ggml-kompute.cpp | 9 ++++++++- llama.cpp | 3 --- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 4367cf20c..ef80b5012 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -186,10 +186,6 @@ int main(int argc, char ** argv) { g_model = &model; g_ctx = &ctx; -#if defined(GGML_USE_KOMPUTE) - ggml_vk_init_device(0, "gpu"); -#endif - // load the model and apply lora adapter, if any LOG("%s: load the model and apply lora adapter, if any\n", __func__); std::tie(model, ctx) = llama_init_from_gpt_params(params); diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp index 31c6f0d90..955e7f077 100644 --- a/ggml-kompute.cpp +++ b/ggml-kompute.cpp @@ -1837,6 +1837,8 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) { static void ggml_backend_kompute_free(ggml_backend_t backend) { struct ggml_kompute_context * ctx = (struct ggml_kompute_context *)backend->context; ggml_vk_free(ctx); + // TODO(cebtenzzre): This should only be done if the device was initialized by us, but + // that would require a change to GPT4All. ggml_vk_free_device(); delete backend; } @@ -1873,6 +1875,12 @@ static struct ggml_backend_i kompute_backend_i = { }; ggml_backend_t ggml_backend_kompute_init() { +#if defined(GGML_USE_KOMPUTE) + if (!ggml_vk_has_device()) { + ggml_vk_init_device(0, "gpu"); + } +#endif + if (!ggml_vk_has_device()) { fprintf(stderr, "%s: error: device was not initialized\n", __func__); return nullptr; @@ -1897,6 +1905,5 @@ extern "C" ggml_backend_t ggml_backend_reg_kompute_init(const char * params, voi ggml_backend_t ggml_backend_reg_kompute_init(const char * params, void * user_data) { GGML_UNUSED(params); GGML_UNUSED(user_data); - ggml_vk_init_device(0, "gpu"); return ggml_backend_kompute_init(); } diff --git a/llama.cpp b/llama.cpp index 95ec257d5..0da73628b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10162,9 +10162,6 @@ struct llama_context * llama_new_context_with_model( void llama_free(struct llama_context * ctx) { delete ctx; -#ifdef GGML_USE_KOMPUTE - ggml_vk_free_device(); -#endif } const llama_model * llama_get_model(const struct llama_context * ctx) {