refactor device in sycl_device, restore ctx in create_queue

2024-07-30 23:49:34 +08:00 · 2024-07-30 23:49:34 +08:00 · d5380f3af2
commit d5380f3af2
parent e66117076c
9 changed files with 495 additions and 481 deletions
--- a/ggml/src/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl.cpp
@ -48,7 +48,7 @@ void   ggml_sycl_get_device_description(int device, char * description, size_t d
 bool   ggml_backend_is_sycl(ggml_backend_t backend);
 int    ggml_backend_sycl_get_device(ggml_backend_t backend);
 static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer);
-
+static bool ggml_backend_buffer_is_sycl(ggml_backend_buffer_t buffer);

 void dev2dev_memcpy(sycl::queue &q_dst, sycl::queue &q_src, void *ptr_dst,
                    const void *ptr_src, size_t size) {
@ -2279,11 +2279,11 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
    for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
        int id = ggml_backend_sycl_get_device_id(i);
        if (tensor_split[i] < (i + 1 < ggml_sycl_info().device_count ? tensor_split[i + 1] : 1.0f)) {
-            if (min_compute_capability > ggml_sycl_info().devices[id].cc) {
-                min_compute_capability = ggml_sycl_info().devices[id].cc;
+            if (min_compute_capability > ggml_sycl_info().device_infos[id].cc) {
+                min_compute_capability = ggml_sycl_info().device_infos[id].cc;
            }
-            if (max_compute_capability < ggml_sycl_info().devices[id].cc) {
-                max_compute_capability = ggml_sycl_info().devices[id].cc;
+            if (max_compute_capability < ggml_sycl_info().device_infos[id].cc) {
+                max_compute_capability = ggml_sycl_info().device_infos[id].cc;
            }
        }
    }
@ -2680,17 +2680,14 @@ static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
    }

 #ifdef NDEBUG
-    for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
-        int id = ggml_backend_sycl_get_device_id(i);
+    for (auto &id: ggml_sycl_info().ids) {
        SYCL_CHECK(ggml_sycl_set_device(id));
    }

-    for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
-        int id = ggml_backend_sycl_get_device_id(i);
+    for (auto &id: ggml_sycl_info().ids) {
        SYCL_CHECK(ggml_sycl_set_device(id));

-        for (int i_other = 0; i_other < ggml_sycl_info().device_count; ++i_other) {
-            int id_other = ggml_backend_sycl_get_device_id(i_other);
+        for (auto &id_other: ggml_sycl_info().ids) {
            if (id == id_other) {
                continue;
            }
@ -2843,7 +2840,6 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
        } else {
            dev[id].src1_ddf = dev[id].src1_ddf_alloc.alloc(ctx.pool(id), ggml_nelements(src1));
        }
-
        if (convert_src1_to_q8_1) {
            dev[id].src1_ddq = dev[id].src1_ddq_alloc.alloc(ctx.pool(id), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);

@ -3165,8 +3161,13 @@ static void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * s

 static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
    GGML_SYCL_DEBUG("call %s\n", __func__);
+    // log_tensor_with_cnt(ctx, "log/src0", src0, -1);
+    // log_tensor_with_cnt(ctx, "log/src1", src1, -1);
+    // log_tensor_with_cnt(ctx, "log/dst0", dst, -1);
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
+    // log_tensor_with_cnt(ctx, "log/dst1", dst, -1);
    GGML_SYCL_DEBUG("call %s done\n", __func__);
+    // exit(1);
 }

 static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -3417,12 +3418,12 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
                continue;
            }

-            if (min_compute_capability > ggml_sycl_info().devices[id].cc) {
-                min_compute_capability = ggml_sycl_info().devices[id].cc;
+            if (min_compute_capability > ggml_sycl_info().device_infos[id].cc) {
+                min_compute_capability = ggml_sycl_info().device_infos[id].cc;
            }
        }
    } else {
-        min_compute_capability    = ggml_sycl_info().devices[ctx.device].cc;
+        min_compute_capability    = ggml_sycl_info().device_infos[ctx.device].cc;
    }

    // check data types and tensor shapes for custom matrix multiplication kernels:
@ -4332,7 +4333,6 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
 ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
    static std::mutex mutex;
    std::lock_guard<std::mutex> lock(mutex);
-
    GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");

    check_allow_device_id(device_id);
@ -4345,7 +4345,9 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
        for (int i = 0; i < ggml_sycl_info().device_count; i++) {
            int id = ggml_backend_sycl_get_device_id(i);
            auto & device = dpct::dev_mgr::instance().get_device(id);
-            queue_ptr stream = &(device.default_queue());
+            // queue_ptr stream = &(device.default_queue());
+            queue_ptr stream = ggml_sycl_info().device_infos[id].qptrs[0];
+
            ggml_backend_sycl_buffer_types[id] = {
                /* .iface    = */ ggml_backend_sycl_buffer_type_interface,
                /* .context  = */ new ggml_backend_sycl_buffer_type_context{id, GGML_SYCL_NAME + std::to_string(id), stream},
--- a/ggml/src/ggml-sycl/common.cpp
+++ b/ggml/src/ggml-sycl/common.cpp
@ -20,12 +20,16 @@ void* ggml_sycl_host_malloc(size_t size) try {
  if (getenv("GGML_SYCL_NO_PINNED") != nullptr) {
    return nullptr;
  }
-
+//   ggml_sycl_info().device_mgr->first_queue
  void* ptr = nullptr;
  // allow to use dpct::get_in_order_queue() for host malloc
-  dpct::err0 err = CHECK_TRY_ERROR(
-      ptr = (void*)sycl::malloc_host(size, dpct::get_in_order_queue()));
+  auto q = dpct::get_in_order_queue();
+//   sycl::queue q = *ggml_sycl_info().device_mgr->qptrs[0][0];

+  dpct::err0 err = CHECK_TRY_ERROR(
+      ptr = (void*)sycl::malloc_host(size, q));
+
+//  printf("zjy ggml_sycl_host_malloc ptr=%p queue=%p size=%lu \n", ptr,q, size);
  if (err != 0) {
    // clear the error
    fprintf(
@ -66,27 +70,6 @@ static inline int get_sycl_env(const char *env_name, int default_val) {
    return user_number;
 }

-static inline bool env_existed(const char *env_name) {
-     char *user_device_string = getenv(env_name);
-     return user_device_string!=NULL;
-}
-
-static std::vector<int> get_sycl_visible_devices() {
-    static std::vector<int> device_ids;
-    char *devices_env = getenv("GGML_SYCL_VISIBLE_DEVICES");
-    if (devices_env != nullptr) {
-        std::string devices(devices_env);
-        std::replace(devices.begin(), devices.end(), ',', ' ');
-
-        std::stringstream ss(devices);
-        int tmp;
-        while (ss >> tmp) {
-            device_ids.push_back(tmp);
-        }
-    }
-    return device_ids;
-}
-
 void print_device_detail_part1(int id, sycl::device &device, std::string device_type) {

    dpct::device_info prop;
@ -193,8 +176,7 @@ static ggml_sycl_device_info ggml_sycl_init() try {
        initialized = true;
    }

-    static ggml_sycl_device_info info = {};
-    info.refresh_device();
+    static ggml_sycl_device_info info;

    if (info.device_count == 0) {
        fprintf(stderr, "%s: failed to initialize " GGML_SYCL_NAME ": no available device found\n",
@ -215,288 +197,4 @@ ggml_sycl_device_info &ggml_sycl_info() {
    return info;
 }

-//--sycl_device_mgr--
-
-sycl_device_mgr::sycl_device_mgr(
-    ggml_sycl_backend_device_filter device_filter) {
-    switch (device_filter) {
-    case SYCL_DEVICES_TOP_LEVEL_ZERO:
-        detect_sycl_gpu_list_with_max_cu();
-        create_context_for_group_gpus();
-        break;
-    case SYCL_ALL_DEVICES:
-        detect_all_sycl_device_list();
-        create_context_for_devices();
-        break;
-    case SYCL_VISIBLE_DEVICES:
-        detect_sycl_visible_device_list();
-        create_context_for_devices();
-        break;
-    default:
-        std::cerr << "sycl_device_mgr: Invalid device_filter " << device_filter
-                  << std::endl;
-    }
-    init_allow_devices();
-}
-
-/*
-Bind all gpus in same host with same context, for better performance in
-device-to-device copy in the future.
-*/
-void sycl_device_mgr::create_context_for_group_gpus() {
-    sycl::context ctx = sycl::context(devices);
-    assert(device_ids.size() > 0);
-    first_queue = _create_queue_ptr(devices[0]);
-    sycl::context ctx0 = first_queue->get_context();
-    for (int i = 0; i < device_ids.size(); i++) {
-        ctxs.push_back(ctx0);
-    }
-}
-
-sycl::queue *sycl_device_mgr::_create_queue_ptr(sycl::device device) {
-    auto q = dpct::get_current_device().create_queue(device);
-    return q;
-    // _queues.push_back(q);
-    // return & _queues.back();
-}
-
-sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::device &device) {
-    dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
-    auto qptr = _create_queue_ptr(device);
-    return qptr;
-}
-
-sycl::queue *sycl_device_mgr::create_queue_for_device_id(int device_id) {
-    int i = get_device_index(device_id);
-    sycl::device device = dpct::dev_mgr::instance().get_device(device_id);
-    return create_queue_for_device(device);
-}
-
-int sycl_device_mgr::get_device_index(int device_id) {
-    for (int i = 0; i < device_ids.size(); i++) {
-        if (device_ids[i] == device_id)
-            return i;
-    }
-    return -1;
-}
-
-void sycl_device_mgr::create_context_for_devices() {
-    for (int i = 0; i < device_ids.size(); i++) {
-        sycl::context ctx = sycl::context(devices[i]);
-        ctxs.push_back(ctx);
-    }
-}
-
-void sycl_device_mgr::init_allow_devices() {
-    device_list = "";
-    for (size_t i = 0; i < device_ids.size(); ++i) {
-        device_list += std::to_string(device_ids[i]);
-        device_list += ",";
-    }
-    if (device_list.length() > 1) {
-        device_list.pop_back();
-    }
-}
-
-bool sycl_device_mgr::is_allowed_device(int device_id) {
-    return std::find(device_ids.begin(), device_ids.end(), device_id) !=
-           device_ids.end();
-}
-
-void sycl_device_mgr::detect_all_sycl_device_list() try {
-    int device_count = dpct::dev_mgr::instance().device_count();
-
-    for (int id = 0; id < device_count; id++) {
-        add_device_info(id);
-    }
-    return;
-} catch (sycl::exception const &exc) {
-    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
-              << ", line:" << __LINE__ << std::endl;
-    std::exit(1);
-}
-
-void sycl_device_mgr::detect_sycl_visible_device_list() try {
-    std::vector<int> sycl_devices = get_sycl_visible_devices();
-    int device_count = dpct::dev_mgr::instance().device_count();
-
-    for (int i = 0; i < sycl_devices.size(); i++) {
-        int id = sycl_devices[i];
-        if (id >= device_count) {
-            std::cerr << __func__ << ": invalid device_id:" << id
-                      << " from GGML_SYCL_VISIBLE_DEVICES="
-                      << getenv("GGML_SYCL_VISIBLE_DEVICES")
-                      << ", available IDs: ";
-            if (device_count > 1) {
-                std::cerr << "[0, " << device_count - 1 << "]";
-            } else if (device_count == 1) {
-                std::cerr << "[0]";
-            } else {
-                std::cerr << "[]";
-            }
-            std::cerr << std::endl;
-        }
-        add_device_info(id);
-    }
-    return;
-} catch (sycl::exception const &exc) {
-    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
-              << ", line:" << __LINE__ << std::endl;
-    std::exit(1);
-}
-
-/*
-Use all GPUs with same top max compute units
-*/
-void sycl_device_mgr::detect_sycl_gpu_list_with_max_cu() try {
-    int device_count = dpct::dev_mgr::instance().device_count();
-    int local_max_compute_units = 0;
-    for (int id = 0; id < device_count; id++) {
-        sycl::device device = dpct::dev_mgr::instance().get_device(id);
-        if (!device.is_gpu())
-            continue;
-        dpct::device_info prop;
-        dpct::get_device_info(prop, device);
-        if (local_max_compute_units < prop.get_max_compute_units())
-            local_max_compute_units = prop.get_max_compute_units();
-    }
-
-    for (int id = 0; id < device_count; id++) {
-        sycl::device device = dpct::dev_mgr::instance().get_device(id);
-        if (!device.is_gpu())
-            continue;
-        dpct::device_info prop;
-        dpct::get_device_info(prop, device);
-        if (local_max_compute_units == prop.get_max_compute_units() &&
-            is_ext_oneapi_device(device)) {
-            add_device_info(id);
-        }
-    }
-    return;
-} catch (sycl::exception const &exc) {
-    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
-              << ", line:" << __LINE__ << std::endl;
-    std::exit(1);
-}
-
-int sycl_device_mgr::get_device_count() { return (int)device_ids.size(); }
-
-bool sycl_device_mgr::is_ext_oneapi_device(const sycl::device &dev) {
-    sycl::backend dev_backend = dev.get_backend();
-    if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
-        dev_backend == sycl::backend::ext_oneapi_cuda ||
-        dev_backend == sycl::backend::ext_oneapi_hip)
-        return true;
-    return false;
-}
-
-void sycl_device_mgr::add_device_info(int id) {
-    sycl::device device = dpct::dev_mgr::instance().get_device(id);
-    device_ids.push_back(id);
-    devices.push_back(device);
-    dpct::device_info prop;
-    dpct::get_device_info(prop, device);
-    work_group_sizes.push_back(prop.get_max_work_group_size());
-    max_compute_units.push_back(prop.get_max_compute_units());
-    hw_familys.push_back(get_device_family(&device));
-}
-
-//--sycl_device_mgr--
-
-//--ggml_sycl_device_info--
-void ggml_sycl_device_info::print_gpu_device_list() {
-    GGML_ASSERT(device_mgr);
-
-    char *hint = NULL;
-    if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
-        hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
-               "GGML_SYCL_VISIBLE_DEVICES=%s\n";
-        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
-                getenv("ONEAPI_DEVICE_SELECTOR"),
-                getenv("GGML_SYCL_VISIBLE_DEVICES"));
-    } else if (oneapi_device_selector_existed) {
-        hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
-        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
-                getenv("ONEAPI_DEVICE_SELECTOR"));
-    } else if (sycl_visible_devices_existed) {
-        hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
-        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
-                getenv("GGML_SYCL_VISIBLE_DEVICES"));
-    } else {
-        hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
-               "units:%d, to use any SYCL devices, set/export "
-               "GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
-        fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
-                device_mgr->max_compute_units[0]);
-    }
-}
-
-int ggml_sycl_device_info::work_group_size(int device_id) {
-    GGML_ASSERT(device_mgr);
-    return device_mgr->work_group_sizes[device_id];
-}
-
-void ggml_sycl_device_info::refresh_device() {
-    oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
-    sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
-    if (!device_mgr)
-        delete device_mgr;
-
-    if (sycl_visible_devices_existed) {
-        device_mgr = new sycl_device_mgr(SYCL_VISIBLE_DEVICES);
-    } else if (oneapi_device_selector_existed) {
-        device_mgr = new sycl_device_mgr(SYCL_ALL_DEVICES);
-    } else {
-        device_mgr = new sycl_device_mgr(SYCL_DEVICES_TOP_LEVEL_ZERO);
-    }
-
-    device_count = device_mgr->get_device_count();
-
-    int64_t total_vram = 0;
-
-    for (int i = 0; i < device_count; ++i) {
-        int id = get_device_id(i);
-        devices[id].vmm = 0;
-        dpct::device_info prop;
-        SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
-            prop, dpct::dev_mgr::instance().get_device(id))));
-
-        default_tensor_split[i] =
-            total_vram; // continue data, so use device index
-        total_vram += prop.get_global_mem_size();
-
-        devices[id].cc =
-            100 * prop.get_major_version() + 10 * prop.get_minor_version();
-    }
-
-    for (int i = 0; i < device_count; ++i) {
-        default_tensor_split[i] /=
-            total_vram; // continue data, so use device index
-    }
-
-    print_gpu_device_list();
-}
-
-bool ggml_sycl_device_info::is_allowed_device(int device_id) {
-    return device_mgr->is_allowed_device(device_id);
-}
-
-const char *ggml_sycl_device_info::devices_list() {
-    return device_mgr->device_list.c_str();
-}
-
-int ggml_sycl_device_info::get_device_id(int device_index) {
-    if (device_index < device_mgr->device_ids.size()) {
-        return device_mgr->device_ids.at(device_index);
-    } else {
-        std::cerr << __func__ << ":SYCL device:" << device_index
-                  << " is out of range:[" << devices_list() << "]" << std::endl;
-        std::exit(1);
-    }
-}
-
-int ggml_sycl_device_info::hw_family(int device_id) {
-    return device_mgr->hw_familys[device_id];
-}
-
 //--ggml_sycl_device_info--
--- a/ggml/src/ggml-sycl/common.hpp
+++ b/ggml/src/ggml-sycl/common.hpp
@ -21,12 +21,13 @@
 #include "ggml-sycl.h"
 #include "presets.hpp"
 #include "sycl_hw.hpp"
+#include "sycl_device.hpp"

 #define GGML_COMMON_DECL_SYCL
 #define GGML_COMMON_IMPL_SYCL
 #include "ggml-common.h"

-void* ggml_sycl_host_malloc(size_t size);
+
 void ggml_sycl_host_free(void* ptr);

 static int g_ggml_sycl_debug = 0;
@ -86,12 +87,6 @@ enum ggml_sycl_backend_gpu_mode {
  SYCL_MUL_GPU_MODE
 };

-enum ggml_sycl_backend_device_filter {
-  SYCL_ALL_DEVICES = 0,
-  SYCL_DEVICES_TOP_LEVEL_ZERO,
-  SYCL_VISIBLE_DEVICES
-};
-
 static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");

 static void crash() {
@ -169,10 +164,10 @@ inline dpct::err0 ggml_sycl_set_device(const int device_id) try {
  int current_device_id;
  SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));

-  GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d, current_device_id=%d\n", device_id, current_device_id);
  if (device_id == current_device_id) {
    return 0;
  }
+  GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d, current_device_id=%d\n", device_id, current_device_id);

  return CHECK_TRY_ERROR(dpct::select_device(device_id));

@ -183,67 +178,6 @@ inline dpct::err0 ggml_sycl_set_device(const int device_id) try {
  std::exit(1);
 }

-
-class sycl_device_mgr {
-  public:
-    std::vector<int> device_ids;
-    std::vector<sycl::device> devices;
-    std::vector<int> max_compute_units;
-    std::vector<int> work_group_sizes;
-    std::vector<int> hw_familys;
-
-    sycl::queue *first_queue;
-    std::vector<sycl::queue> _queues;
-    std::vector<sycl::context> ctxs;
-    std::string device_list = "";
-
-    sycl_device_mgr(ggml_sycl_backend_device_filter device_filter);
-
-    sycl::queue *_create_queue_ptr(sycl::device device); //internal API to hide dpct API.
-    void create_context_for_group_gpus();
-    sycl::queue *create_queue_for_device(sycl::device &device);
-    sycl::queue *create_queue_for_device_id(int device_id);
-    int get_device_index(int device_id);
-    void create_context_for_devices();
-    void init_allow_devices();
-    bool is_allowed_device(int device_id);
-    void detect_all_sycl_device_list();
-    void detect_sycl_visible_device_list();
-    void detect_sycl_gpu_list_with_max_cu();
-    int get_device_count();
-    bool is_ext_oneapi_device(const sycl::device &dev);
-    void add_device_info(int id);
-};
-
-
-struct ggml_sycl_device_info {
-    int device_count;
-    bool oneapi_device_selector_existed = false;
-    bool sycl_visible_devices_existed = false;
-
-    struct sycl_device_info {
-        int     cc;                 // compute capability
-        // int     nsm;                // number of streaming multiprocessors
-        // size_t  smpb;               // max. shared memory per block
-        bool    vmm;                // virtual memory support
-        size_t  total_vram;
-    };
-
-    sycl_device_info devices[GGML_SYCL_MAX_DEVICES] = {};
-
-    std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
-
-    sycl_device_mgr *device_mgr = NULL;
-
-    void print_gpu_device_list();
-    int work_group_size(int device_id);
-    void refresh_device();
-    bool is_allowed_device(int device_id);
-    const char* devices_list();
-    int get_device_id(int device_index);
-    int hw_family(int device_id);
-};
-
 struct ggml_sycl_pool {
    virtual ~ggml_sycl_pool() = default;

@ -309,17 +243,17 @@ struct ggml_backend_sycl_context {

    queue_ptr qptrs[GGML_SYCL_MAX_DEVICES][GGML_SYCL_MAX_STREAMS] = { { nullptr } };

-    explicit ggml_backend_sycl_context(struct ggml_sycl_device_info &sycl_device_info, int device_id) :
-        device(device_id),
+    explicit ggml_backend_sycl_context(struct ggml_sycl_device_info &sycl_device_info, int id) :
+        device(id),
        name(GGML_SYCL_NAME + std::to_string(device)) {
            for (int i=0;i<GGML_SYCL_MAX_STREAMS; i++){
-                qptrs[device_id][i] = sycl_device_info.device_mgr->create_queue_for_device_id(device_id);
+                qptrs[id][i] = sycl_device_info.device_infos[id].qptrs[i];
            }
    }

-    queue_ptr stream(int device, int stream) {
-        assert(qptrs[device][stream] != nullptr);
-        return qptrs[device][stream];
+    queue_ptr stream(int id, int stream) {
+        assert(qptrs[id][stream] != nullptr);
+        return qptrs[id][stream];
    }

    queue_ptr stream() {
@ -349,10 +283,10 @@ static inline void exit_with_stack_print() {


 static inline int get_sycl_env(const char *env_name, int default_val);
-static inline bool env_existed(const char *env_name);
+
 void* ggml_sycl_host_malloc(size_t size);
 void ggml_sycl_host_free(void* ptr);
-static std::vector<int> get_sycl_visible_devices();
+
 void ggml_backend_sycl_print_sycl_devices();
 static ggml_sycl_device_info ggml_sycl_init();
 ggml_sycl_device_info &ggml_sycl_info();
--- a/ggml/src/ggml-sycl/dpct/helper.hpp
+++ b/ggml/src/ggml-sycl/dpct/helper.hpp
@ -592,16 +592,18 @@ namespace dpct
    class device_ext : public sycl::device {
      typedef std::mutex mutex_type;

-     public:
-      device_ext() : sycl::device() {}
-      ~device_ext() {
-        std::lock_guard<mutex_type> lock(m_mutex);
-        clear_queues();
-      }
-      device_ext(const sycl::device &base) : sycl::device(base) {
-        std::lock_guard<mutex_type> lock(m_mutex);
-        init_queues();
-      }
+    public:
+        device_ext() : sycl::device(), _ctx(*this) {}
+        ~device_ext()
+        {
+            std::lock_guard<mutex_type> lock(m_mutex);
+            clear_queues();
+        }
+        device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this)
+        {
+            std::lock_guard<mutex_type> lock(m_mutex);
+            init_queues();
+        }

      int is_native_atomic_supported() { return 0; }
      int get_major_version() const { return dpct::get_major_version(*this); }
@ -711,10 +713,10 @@ namespace dpct
        return create_in_order_queue(enable_exception_handler);
      }

-      sycl::queue *create_queue(sycl::device device,
-                               bool enable_exception_handler = false) {
-        return create_in_order_queue(device, enable_exception_handler);
-      }
+        sycl::queue *create_queue(sycl::context context, sycl::device device,
+                                bool enable_exception_handler = false) {
+            return create_in_order_queue(context, device, enable_exception_handler);
+        }

      sycl::queue *create_in_order_queue(bool enable_exception_handler = false) {
        std::lock_guard<mutex_type> lock(m_mutex);
@ -722,12 +724,12 @@ namespace dpct
                                 sycl::property::queue::in_order());
      }

-      sycl::queue *create_in_order_queue(sycl::device device,
+        sycl::queue *create_in_order_queue(sycl::context context, sycl::device device,
                                        bool enable_exception_handler = false) {
-        std::lock_guard<mutex_type> lock(m_mutex);
-        return create_queue_impl(device, enable_exception_handler,
-                                 sycl::property::queue::in_order());
-      }
+            std::lock_guard<mutex_type> lock(m_mutex);
+            return create_queue_impl(context, device, enable_exception_handler,
+                                    sycl::property::queue::in_order());
+        }

      sycl::queue *create_out_of_order_queue(
          bool enable_exception_handler = false) {
@ -735,24 +737,28 @@ namespace dpct
        return create_queue_impl(enable_exception_handler);
      }

-      void destroy_queue(sycl::queue *&queue) {
-        std::lock_guard<mutex_type> lock(m_mutex);
-        _queues.erase(std::remove_if(_queues.begin(), _queues.end(),
-                                    [=](const std::shared_ptr<sycl::queue> &q) -> bool
-                                    {
-                                        return q.get() == queue;
-                                    }),
-                    _queues.end());
-        queue = nullptr;
-      }
-      void set_saved_queue(sycl::queue *q) {
-        std::lock_guard<mutex_type> lock(m_mutex);
-        _saved_queue = q;
-      }
-      sycl::queue *get_saved_queue() const {
-        std::lock_guard<mutex_type> lock(m_mutex);
-        return _saved_queue;
-      }
+        void destroy_queue(sycl::queue *&queue)
+        {
+            std::lock_guard<mutex_type> lock(m_mutex);
+            _queues.erase(std::remove_if(_queues.begin(), _queues.end(),
+                                         [=](const std::shared_ptr<sycl::queue> &q) -> bool
+                                         {
+                                             return q.get() == queue;
+                                         }),
+                          _queues.end());
+            queue = nullptr;
+        }
+        void set_saved_queue(sycl::queue *q)
+        {
+            std::lock_guard<mutex_type> lock(m_mutex);
+            _saved_queue = q;
+        }
+        sycl::queue *get_saved_queue() const
+        {
+            std::lock_guard<mutex_type> lock(m_mutex);
+            return _saved_queue;
+        }
+        sycl::context get_context() const { return _ctx; }

     private:
      void clear_queues() {
@ -767,18 +773,19 @@ namespace dpct
        _saved_queue = &default_queue();
      }

-      /// Caller should acquire resource \p m_mutex before calling this
-      /// function.
-      template <class... Properties>
-      sycl::queue *create_queue_impl(bool enable_exception_handler,
-                                    Properties... properties) {
-        sycl::async_handler eh = {};
-        if (enable_exception_handler) {
-          eh = exception_handler;
-        }
-        _queues.push_back(std::make_shared<sycl::queue>(
-            *this, eh,
-            sycl::property_list(
+        /// Caller should acquire resource \p m_mutex before calling this function.
+        template <class... Properties>
+        sycl::queue *create_queue_impl(bool enable_exception_handler,
+                                       Properties... properties)
+        {
+            sycl::async_handler eh = {};
+            if (enable_exception_handler)
+            {
+                eh = exception_handler;
+            }
+            _queues.push_back(std::make_shared<sycl::queue>(
+                _ctx, *this, eh,
+                sycl::property_list(
 #ifdef DPCT_PROFILING_ENABLED
                sycl::property::queue::enable_profiling(),
 #endif
@ -787,21 +794,21 @@ namespace dpct
        return _queues.back().get();
      }

-      template <class... Properties>
-      sycl::queue *create_queue_impl(sycl::device device,
+        template <class... Properties>
+        sycl::queue *create_queue_impl(sycl::context context, sycl::device device,
                                    bool enable_exception_handler,
                                    Properties... properties) {
-        sycl::async_handler eh = {};
-        if (enable_exception_handler) {
-          eh = exception_handler;
-        }
-        _queues.push_back(std::make_shared<sycl::queue>(
-            device, eh,
-                        sycl::property_list(
-#ifdef DPCT_PROFILING_ENABLED
-                            sycl::property::queue::enable_profiling(),
-#endif
-                            properties...)));
+            sycl::async_handler eh = {};
+            if (enable_exception_handler) {
+                eh = exception_handler;
+            }
+            _queues.push_back(std::make_shared<sycl::queue>(
+                context, device, eh,
+                sycl::property_list(
+        #ifdef DPCT_PROFILING_ENABLED
+                    sycl::property::queue::enable_profiling(),
+        #endif
+                    properties...)));

        return _queues.back().get();
      }
@ -811,6 +818,7 @@ namespace dpct
      }
      sycl::queue *_q_in_order, *_q_out_of_order;
      sycl::queue *_saved_queue;
+      sycl::context _ctx;
      std::vector<std::shared_ptr<sycl::queue>> _queues;
      mutable mutex_type m_mutex;
    };
--- a/ggml/src/ggml-sycl/mmq.cpp
+++ b/ggml/src/ggml-sycl/mmq.cpp
@ -1779,7 +1779,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -1894,7 +1894,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2009,7 +2009,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2124,7 +2124,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2239,7 +2239,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2354,7 +2354,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2477,7 +2477,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2605,7 +2605,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2726,7 +2726,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
@ -2847,7 +2847,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
    int id;
    SYCL_CHECK(
        CHECK_TRY_ERROR(id = get_current_device_id()));
-    const int compute_capability = ggml_sycl_info().devices[id].cc;
+    const int compute_capability = ggml_sycl_info().device_infos[id].cc;

    int mmq_x, mmq_y, nwarps;
    if (compute_capability >= VER_GEN13) {
--- a/ggml/src/ggml-sycl/sycl_device.cpp
+++ b/ggml/src/ggml-sycl/sycl_device.cpp
@ -0,0 +1,286 @@
+#include "sycl_device.hpp"
+#include "sycl_hw.hpp"
+
+
+void ggml_sycl_device_info::init(
+    ggml_sycl_backend_device_filter device_filter) {
+    switch (device_filter) {
+    case SYCL_DEVICES_TOP_LEVEL_ZERO:
+        detect_sycl_gpu_list_with_max_cu();
+        create_context_for_devices();
+        break;
+    case SYCL_ALL_DEVICES:
+        detect_all_sycl_device_list();
+        create_context_for_devices();
+        break;
+    case SYCL_VISIBLE_DEVICES:
+        detect_sycl_visible_device_list();
+        create_context_for_devices();
+        break;
+    default:
+        std::cerr << "ggml_sycl_device_info: Invalid device_filter " << device_filter
+                  << std::endl;
+    }
+    init_allow_devices();
+    device_count = ids.size();
+}
+
+/*
+Bind all devices in same host with same context, for better performance in
+device-to-device copy in the future.
+*/
+void ggml_sycl_device_info::create_context_for_devices() {
+    assert(devices.size() > 0);
+    sycl::context ctx = sycl::context(devices);
+    first_queue = dpct::get_current_device().create_queue(ctx, devices[0]);
+    co_ctx = first_queue->get_context();
+}
+
+sycl::queue *ggml_sycl_device_info::_create_queue_ptr(sycl::device device) {
+    auto q = dpct::get_current_device().create_queue(co_ctx, device);
+    return q;
+}
+
+sycl::queue *ggml_sycl_device_info::create_queue_for_device(sycl::device &device) {
+    dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
+    auto qptr = _create_queue_ptr(device);
+    return qptr;
+}
+
+sycl::queue *ggml_sycl_device_info::create_queue_for_device_id(int id) {
+    sycl::device device = dpct::dev_mgr::instance().get_device(id);
+    return create_queue_for_device(device);
+}
+
+int ggml_sycl_device_info::get_device_index(int id) {
+    for (int i = 0; i < ids.size(); i++) {
+        if (ids[i] == id)
+            return i;
+    }
+    return -1;
+}
+
+void ggml_sycl_device_info::init_allow_devices() {
+    device_list = "";
+    for (auto & id: ids) {
+        device_list += std::to_string(id);
+        device_list += ",";
+    }
+    if (device_list.length() > 1) {
+        device_list.pop_back();
+    }
+}
+
+bool ggml_sycl_device_info::is_allowed_device(int id) {
+    return std::find(ids.begin(), ids.end(), id) != ids.end();
+}
+
+void ggml_sycl_device_info::detect_all_sycl_device_list() try {
+    int all_device_count = dpct::dev_mgr::instance().device_count();
+
+    for (int id = 0; id < all_device_count; id++) {
+        add_device_info(id);
+    }
+    return;
+} catch (sycl::exception const &exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+              << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
+}
+
+std::vector<int> ggml_sycl_device_info::get_sycl_visible_devices() {
+    static std::vector<int> device_ids;
+    char *devices_env = getenv("GGML_SYCL_VISIBLE_DEVICES");
+    if (devices_env != nullptr) {
+        std::string devices(devices_env);
+        std::replace(devices.begin(), devices.end(), ',', ' ');
+
+        std::stringstream ss(devices);
+        int tmp;
+        while (ss >> tmp) {
+            device_ids.push_back(tmp);
+        }
+    }
+    return device_ids;
+}
+
+void ggml_sycl_device_info::detect_sycl_visible_device_list() try {
+    std::vector<int> sycl_devices = get_sycl_visible_devices();
+    int all_device_count = dpct::dev_mgr::instance().device_count();
+
+    for (auto & id: sycl_devices) {
+        if (id >= all_device_count) {
+            std::cerr << __func__ << ": invalid device_id:" << id
+                      << " from GGML_SYCL_VISIBLE_DEVICES="
+                      << getenv("GGML_SYCL_VISIBLE_DEVICES")
+                      << ", available IDs: ";
+            if (all_device_count > 1) {
+                std::cerr << "[0, " << all_device_count - 1 << "]";
+            } else if (all_device_count == 1) {
+                std::cerr << "[0]";
+            } else {
+                std::cerr << "[]";
+            }
+            std::cerr << std::endl;
+        }
+        add_device_info(id);
+    }
+    return;
+} catch (sycl::exception const &exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+              << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
+}
+
+/*
+Use all GPUs with same top max compute units
+*/
+void ggml_sycl_device_info::detect_sycl_gpu_list_with_max_cu() try {
+    int all_device_count = dpct::dev_mgr::instance().device_count();
+    int local_max_compute_units = 0;
+    for (int id = 0; id < all_device_count; id++) {
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        if (!device.is_gpu())
+            continue;
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        if (local_max_compute_units < prop.get_max_compute_units())
+            local_max_compute_units = prop.get_max_compute_units();
+    }
+
+    for (int id = 0; id < all_device_count; id++) {
+        sycl::device device = dpct::dev_mgr::instance().get_device(id);
+        if (!device.is_gpu())
+            continue;
+        dpct::device_info prop;
+        dpct::get_device_info(prop, device);
+        if (local_max_compute_units == prop.get_max_compute_units() &&
+            is_ext_oneapi_device(device)) {
+            add_device_info(id);
+        }
+    }
+    return;
+} catch (sycl::exception const &exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
+              << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
+}
+
+int ggml_sycl_device_info::get_device_count() { return device_count; }
+
+bool ggml_sycl_device_info::is_ext_oneapi_device(const sycl::device &dev) {
+    sycl::backend dev_backend = dev.get_backend();
+    if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
+        dev_backend == sycl::backend::ext_oneapi_cuda ||
+        dev_backend == sycl::backend::ext_oneapi_hip)
+        return true;
+    return false;
+}
+
+void ggml_sycl_device_info::add_device_info(int id) {
+    sycl::device device = dpct::dev_mgr::instance().get_device(id);
+    dpct::device_info prop;
+    dpct::get_device_info(prop, device);
+
+    ids.push_back(id);
+    devices.push_back(device);
+
+    device_infos[id].id = id;
+    device_infos[id].device = device;
+    device_infos[id].max_work_group_sizes = prop.get_max_work_group_size();
+    device_infos[id].max_compute_units = prop.get_max_compute_units();
+    device_infos[id].hw_family = get_device_family(&device);
+    for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
+        device_infos[id].qptrs[i] = create_queue_for_device_id(id);
+    }
+}
+
+void ggml_sycl_device_info::print_gpu_device_list() {
+   char *hint = NULL;
+    if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
+        hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
+               "GGML_SYCL_VISIBLE_DEVICES=%s\n";
+        fprintf(stderr, hint, get_device_count(), devices_list(),
+                getenv("ONEAPI_DEVICE_SELECTOR"),
+                getenv("GGML_SYCL_VISIBLE_DEVICES"));
+    } else if (oneapi_device_selector_existed) {
+        hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
+        fprintf(stderr, hint, get_device_count(), devices_list(),
+                getenv("ONEAPI_DEVICE_SELECTOR"));
+    } else if (sycl_visible_devices_existed) {
+        hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
+        fprintf(stderr, hint, get_device_count(), devices_list(),
+                getenv("GGML_SYCL_VISIBLE_DEVICES"));
+    } else {
+        hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
+               "units:%d, to use any SYCL devices, set/export "
+               "GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
+        fprintf(stderr, hint, get_device_count(), devices_list(),
+                device_infos[0].max_compute_units);
+    }
+}
+
+int ggml_sycl_device_info::work_group_size(int id) {
+    GGML_ASSERT(is_allowed_device(id));
+    return device_infos[id].max_work_group_sizes;
+}
+
+ggml_sycl_device_info::ggml_sycl_device_info() {
+    oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
+    sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
+
+    if (sycl_visible_devices_existed) {
+        init(SYCL_VISIBLE_DEVICES);
+    } else if (oneapi_device_selector_existed) {
+        init(SYCL_ALL_DEVICES);
+    } else {
+        init(SYCL_DEVICES_TOP_LEVEL_ZERO);
+    }
+
+    int64_t total_vram = 0;
+
+    for (int i = 0; i < device_count; ++i) {
+        int id = get_device_id(i);
+        device_infos[id].vmm = 0;
+        dpct::device_info prop;
+        dpct::get_device_info(
+            prop, dpct::dev_mgr::instance().get_device(id));
+
+        // continue data, so use device index
+        default_tensor_split[i] = total_vram;
+        total_vram += prop.get_global_mem_size();
+
+        device_infos[id].cc =
+            100 * prop.get_major_version() + 10 * prop.get_minor_version();
+    }
+
+    // continue data, so use device index
+    for (int i = 0; i < device_count; ++i) {
+        default_tensor_split[i] /= total_vram;
+    }
+
+    print_gpu_device_list();
+}
+
+const char *ggml_sycl_device_info::devices_list() {
+    return device_list.c_str();
+}
+
+int ggml_sycl_device_info::get_device_id(int device_index) {
+    if (device_index < device_count) {
+        return ids.at(device_index);
+    } else {
+        std::cerr << __func__ << ":SYCL device:" << device_index
+                  << " is out of range:[" << devices_list() << "]" << std::endl;
+        std::exit(1);
+    }
+}
+
+int ggml_sycl_device_info::hw_family(int id) {
+    return device_infos[id].hw_family;
+}
+
+static inline bool env_existed(const char *env_name) {
+     char *user_device_string = getenv(env_name);
+     return user_device_string!=NULL;
+}
--- a/ggml/src/ggml-sycl/sycl_device.hpp
+++ b/ggml/src/ggml-sycl/sycl_device.hpp
@ -0,0 +1,83 @@
+#ifndef SYCL_DEVICE_HPP
+#define SYCL_DEVICE_HPP
+
+#include <algorithm>
+#include <stdio.h>
+#include <vector>
+
+
+#include <sycl/sycl.hpp>
+#include "dpct/helper.hpp"
+
+#include "ggml-sycl.h"
+#include "presets.hpp"
+// #include "common.hpp"
+
+enum ggml_sycl_backend_device_filter {
+  SYCL_ALL_DEVICES = 0,
+  SYCL_DEVICES_TOP_LEVEL_ZERO,
+  SYCL_VISIBLE_DEVICES
+};
+
+struct sycl_device_info {
+    int     cc;                 // compute capability
+    // int     nsm;                // number of streaming multiprocessors
+    // size_t  smpb;               // max. shared memory per block
+    bool    vmm;                // virtual memory support
+    size_t  total_vram;
+
+    int id;
+    sycl::device device;
+    int max_compute_units;
+    int max_work_group_sizes;
+    int hw_family;
+    sycl::context ctx;
+    sycl::queue * qptrs[GGML_SYCL_MAX_STREAMS] = { nullptr };
+};
+
+struct ggml_sycl_device_info {
+    int device_count;
+    bool oneapi_device_selector_existed = false;
+    bool sycl_visible_devices_existed = false;
+    std::vector<int> ids;
+    std::vector<sycl::device> devices;
+    sycl::queue *first_queue;
+    std::string device_list;
+    sycl::context co_ctx;
+
+    sycl_device_info device_infos[GGML_SYCL_MAX_DEVICES];
+    std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
+
+    ggml_sycl_device_info();
+    void init(ggml_sycl_backend_device_filter device_filter);
+
+    void print_gpu_device_list();
+    int work_group_size(int device_id);
+    bool is_allowed_device(int device_id);
+    const char* devices_list();
+    int get_device_id(int device_index);
+    int hw_family(int device_id);
+
+    sycl::queue *_create_queue_ptr(sycl::device device); //internal API to hide dpct API.
+    void create_context_for_group_gpus();
+    sycl::queue *create_queue_for_device(sycl::device &device);
+    sycl::queue *create_queue_for_device_id(int device_id);
+    int get_device_index(int device_id);
+    void create_context_for_devices();
+    void init_allow_devices();
+    void detect_all_sycl_device_list();
+    void detect_sycl_visible_device_list();
+    void detect_sycl_gpu_list_with_max_cu();
+    int get_device_count();
+    bool is_ext_oneapi_device(const sycl::device &dev);
+    void add_device_info(int id);
+    std::vector<sycl::device> get_devices();
+    std::vector<int> get_sycl_visible_devices();
+
+    sycl::context &get_co_ctx() { return co_ctx; }
+
+};
+
+static inline bool env_existed(const char *env_name);
+
+#endif // SYCL_DEVICE_HPP
--- a/ggml/src/ggml-sycl/sycl_hw.cpp
+++ b/ggml/src/ggml-sycl/sycl_hw.cpp
@ -8,8 +8,14 @@ SYCL_HW_FAMILY get_device_family(sycl::device *device_ptr) {
  auto id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
  auto id_prefix = id & 0xff00;

+  const std::vector<int> Xe_ARC = {0x5600, 0x4f00};
+  const std::vector<int> Xe_Iris_IDs = {0x4900, 0xa700};
+  const std::vector<int> UHD_IDs = {0x4600};
+
  if (is_in_vector(Xe_Iris_IDs, id_prefix) or is_in_vector(UHD_IDs, id_prefix)) {
    return SYCL_HW_FAMILY_INTEL_IGPU;
+  } else if (is_in_vector(Xe_ARC, id_prefix)) {
+    return SYCL_HW_FAMILY_INTEL_ARC;
  } else {
    std::cerr << "No support PCI_ID: " << std::hex << id << std::endl;
    return SYCL_HW_FAMILY_UNKNOWN;
--- a/ggml/src/ggml-sycl/sycl_hw.hpp
+++ b/ggml/src/ggml-sycl/sycl_hw.hpp
@ -8,13 +8,10 @@

 #include <sycl/sycl.hpp>

-// const int Xe_ARC[] = {0x5600, 0x4f};
-const std::vector<int> Xe_Iris_IDs = {0x4900, 0xa700};
-const std::vector<int> UHD_IDs = {0x4600};
-
 enum SYCL_HW_FAMILY {
  SYCL_HW_FAMILY_UNKNOWN = -1,
-  SYCL_HW_FAMILY_INTEL_IGPU = 0
+  SYCL_HW_FAMILY_INTEL_IGPU = 0,
+  SYCL_HW_FAMILY_INTEL_ARC = 1
 };

 bool is_in_vector(std::vector<int> &vec, int item);