From 3704f33389cea4183eb96eb64a8453c93c41c361 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 12 Jun 2024 14:41:27 -0700
Subject: [PATCH] sycl: always set the main device after initialization

Because we are using the main device to determine the context
for USM host allocations, we need to ensure it is set to a valid
value after initialization, so set device zero as the initial
main device.

Also, adds a small refactor to the GPU detection logic, to ensure
all GPUs are from the same backend.  Although unlikely due to the
max compute unit check, the prior code would attempt to use GPUs
from different backends together if they happened to have the same
maximum number of compute units.  As an added bonus, the updates
work with GPUs using the OpenCL backend, also.
---
 ggml-sycl.cpp | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index e7d260bd4..483c03071 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -3307,6 +3307,7 @@ class sycl_gpu_mgr {
 
         void detect_sycl_gpu_list_with_max_cu() try {
             int device_count = dpct::dev_mgr::instance().device_count();
+            sycl::backend backend;
 
             for (int id = 0; id < device_count; id++) {
                 sycl::device device = dpct::dev_mgr::instance().get_device(id);
@@ -3314,8 +3315,10 @@ class sycl_gpu_mgr {
                     continue;
                 dpct::device_info prop;
                 dpct::get_device_info(prop, device);
-                if (max_compute_units < prop.get_max_compute_units())
+                if (max_compute_units < prop.get_max_compute_units()) {
                     max_compute_units = prop.get_max_compute_units();
+                    backend = device.get_backend();
+                }
             }
 
             for (int id = 0; id < device_count; id++) {
@@ -3325,7 +3328,7 @@ class sycl_gpu_mgr {
                 dpct::device_info prop;
                 dpct::get_device_info(prop, device);
                 if (max_compute_units == prop.get_max_compute_units() &&
-                    is_ext_oneapi_device(device)) {
+                    backend == device.get_backend()) {
                     gpus.push_back(id);
                     devices.push_back(device);
                     work_group_size = prop.get_max_work_group_size();
@@ -3357,15 +3360,6 @@ class sycl_gpu_mgr {
             }
             GGML_ASSERT(false);
         }
-
-        bool is_ext_oneapi_device(const sycl::device &dev) {
-            sycl::backend dev_backend = dev.get_backend();
-            if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
-                dev_backend == sycl::backend::ext_oneapi_cuda ||
-                dev_backend == sycl::backend::ext_oneapi_hip)
-                return true;
-            return false;
-        }
 };
 
 static sycl_gpu_mgr *g_sycl_gpu_mgr = NULL;
@@ -17400,6 +17394,7 @@ GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id
     g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
     g_ggml_sycl_backend_gpu_mode = SYCL_SINGLE_GPU_MODE;
     ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
+    ggml_sycl_set_main_device(0);
     g_ggml_backend_sycl_buffer_type_initialized = false;
 }
 
@@ -17419,6 +17414,7 @@ GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
     g_sycl_gpu_mgr = new sycl_gpu_mgr();
     g_ggml_sycl_backend_gpu_mode = SYCL_MUL_GPU_MODE;
     ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
+    ggml_sycl_set_main_device(0);
     g_ggml_backend_sycl_buffer_type_initialized = false;
 }