kompute : init device automatically and remove an unnecessary free

2024-01-26 14:42:11 -05:00 · 2024-01-26 14:42:11 -05:00 · 6af02b19d1
commit 6af02b19d1
parent 8ca33dec7d
3 changed files with 8 additions and 8 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -186,10 +186,6 @@ int main(int argc, char ** argv) {
    g_model = &model;
    g_ctx = &ctx;
 #if defined(GGML_USE_KOMPUTE)
    ggml_vk_init_device(0, "gpu");
 #endif
    // load the model and apply lora adapter, if any
    LOG("%s: load the model and apply lora adapter, if any\n", __func__);
    std::tie(model, ctx) = llama_init_from_gpt_params(params);
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@ -1837,6 +1837,8 @@ static const char * ggml_backend_kompute_name(ggml_backend_t backend) {
 static void ggml_backend_kompute_free(ggml_backend_t backend) {
    struct ggml_kompute_context * ctx = (struct ggml_kompute_context *)backend->context;
    ggml_vk_free(ctx);
    // TODO(cebtenzzre): This should only be done if the device was initialized by us, but
    //                   that would require a change to GPT4All.
    ggml_vk_free_device();
    delete backend;
 }
@ -1873,6 +1875,12 @@ static struct ggml_backend_i kompute_backend_i = {
 };
 ggml_backend_t ggml_backend_kompute_init() {
 #if defined(GGML_USE_KOMPUTE)
    if (!ggml_vk_has_device()) {
        ggml_vk_init_device(0, "gpu");
    }
 #endif
    if (!ggml_vk_has_device()) {
        fprintf(stderr, "%s: error: device was not initialized\n", __func__);
        return nullptr;
@ -1897,6 +1905,5 @@ extern "C" ggml_backend_t ggml_backend_reg_kompute_init(const char * params, voi
 ggml_backend_t ggml_backend_reg_kompute_init(const char * params, void * user_data) {
    GGML_UNUSED(params);
    GGML_UNUSED(user_data);
    ggml_vk_init_device(0, "gpu");
    return ggml_backend_kompute_init();
 }
--- a/llama.cpp
+++ b/llama.cpp
@ -10162,9 +10162,6 @@ struct llama_context * llama_new_context_with_model(
 void llama_free(struct llama_context * ctx) {
    delete ctx;
 #ifdef GGML_USE_KOMPUTE
    ggml_vk_free_device();
 #endif
 }
 const llama_model * llama_get_model(const struct llama_context * ctx) {