[SYCL] fix error when set main gpu to non-zero (#5901)

* fix error when set main gpu to non-zero * fix delete condition
2024-03-07 16:34:31 +08:00 · 2024-03-07 16:34:31 +08:00 · ceca1aef07
commit ceca1aef07
parent e04e04f8fa
3 changed files with 119 additions and 76 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -3750,6 +3750,14 @@ static bool llm_load_tensors(
    model.main_gpu     = main_gpu;
    model.n_gpu_layers = n_gpu_layers;

+#ifdef GGML_USE_SYCL
+    if (split_mode == LLAMA_SPLIT_MODE_NONE) {
+        ggml_backend_sycl_set_single_device(main_gpu);
+        //SYCL use device index (0, 1, 2), instead if device id.
+        main_gpu = ggml_backend_sycl_get_device_index(main_gpu);
+    }
+#endif
+
    const int64_t n_layer     = hparams.n_layer;
    const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);

@ -12260,13 +12268,13 @@ struct llama_context * llama_new_context_with_model(
                ctx->backends.push_back(backend);
            } else {
                // LLAMA_SPLIT_LAYER requires a backend for each GPU
-                int id_list[GGML_SYCL_MAX_DEVICES];
-                ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
+
                for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
-                    int device_id = id_list[i];
                    ggml_backend_t backend = ggml_backend_sycl_init(i);
                    if (backend == nullptr) {
-                        LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, device_id, i);
+                        int id_list[GGML_SYCL_MAX_DEVICES];
+                        ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
+                        LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, id_list[i], i);
                        llama_free(ctx);
                        return nullptr;
                    }