llama : abort ctx if cuda backend init fails

2024-01-08 14:00:34 +01:00 · 2024-01-08 14:00:34 +01:00 · 4813e17548
commit 4813e17548
parent 5a62db30c3
1 changed files with 5 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -9199,10 +9199,12 @@ struct llama_context * llama_new_context_with_model(
 #elif defined(GGML_USE_CUBLAS)
        if (model->n_gpu_layers > 0) {
            // with split_mode LLAMA_SPLIT_NONE or LLAMA_SPLIT_ROW, only the main GPU backend is used
-            if (model->split_mode == LLAMA_SPLIT_ROW || model->split_mode == LLAMA_SPLIT_NONE) {
+            if (model->split_mode == LLAMA_SPLIT_NONE || model->split_mode == LLAMA_SPLIT_ROW) {
                ggml_backend_t backend = ggml_backend_cuda_init(model->main_gpu);
                if (backend == nullptr) {
                    LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, model->main_gpu);
+                    llama_free(ctx);
+                    return nullptr;
                }
                ctx->backends.push_back(backend);
            } else {
@ -9211,6 +9213,8 @@ struct llama_context * llama_new_context_with_model(
                    ggml_backend_t backend = ggml_backend_cuda_init(device);
                    if (backend == nullptr) {
                        LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, device);
+                        llama_free(ctx);
+                        return nullptr;
                    }
                    ctx->backends.push_back(backend);
                }