refactor device in sycl_device, restore ctx in create_queue

This commit is contained in:
arthw 2024-07-30 23:49:34 +08:00
parent e66117076c
commit d5380f3af2
9 changed files with 495 additions and 481 deletions

View file

@ -48,7 +48,7 @@ void ggml_sycl_get_device_description(int device, char * description, size_t d
bool ggml_backend_is_sycl(ggml_backend_t backend);
int ggml_backend_sycl_get_device(ggml_backend_t backend);
static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer);
static bool ggml_backend_buffer_is_sycl(ggml_backend_buffer_t buffer);
void dev2dev_memcpy(sycl::queue &q_dst, sycl::queue &q_src, void *ptr_dst,
const void *ptr_src, size_t size) {
@ -2279,11 +2279,11 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
int id = ggml_backend_sycl_get_device_id(i);
if (tensor_split[i] < (i + 1 < ggml_sycl_info().device_count ? tensor_split[i + 1] : 1.0f)) {
if (min_compute_capability > ggml_sycl_info().devices[id].cc) {
min_compute_capability = ggml_sycl_info().devices[id].cc;
if (min_compute_capability > ggml_sycl_info().device_infos[id].cc) {
min_compute_capability = ggml_sycl_info().device_infos[id].cc;
}
if (max_compute_capability < ggml_sycl_info().devices[id].cc) {
max_compute_capability = ggml_sycl_info().devices[id].cc;
if (max_compute_capability < ggml_sycl_info().device_infos[id].cc) {
max_compute_capability = ggml_sycl_info().device_infos[id].cc;
}
}
}
@ -2680,17 +2680,14 @@ static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
}
#ifdef NDEBUG
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
int id = ggml_backend_sycl_get_device_id(i);
for (auto &id: ggml_sycl_info().ids) {
SYCL_CHECK(ggml_sycl_set_device(id));
}
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
int id = ggml_backend_sycl_get_device_id(i);
for (auto &id: ggml_sycl_info().ids) {
SYCL_CHECK(ggml_sycl_set_device(id));
for (int i_other = 0; i_other < ggml_sycl_info().device_count; ++i_other) {
int id_other = ggml_backend_sycl_get_device_id(i_other);
for (auto &id_other: ggml_sycl_info().ids) {
if (id == id_other) {
continue;
}
@ -2843,7 +2840,6 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
} else {
dev[id].src1_ddf = dev[id].src1_ddf_alloc.alloc(ctx.pool(id), ggml_nelements(src1));
}
if (convert_src1_to_q8_1) {
dev[id].src1_ddq = dev[id].src1_ddq_alloc.alloc(ctx.pool(id), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
@ -3165,8 +3161,13 @@ static void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * s
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
// log_tensor_with_cnt(ctx, "log/src0", src0, -1);
// log_tensor_with_cnt(ctx, "log/src1", src1, -1);
// log_tensor_with_cnt(ctx, "log/dst0", dst, -1);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
// log_tensor_with_cnt(ctx, "log/dst1", dst, -1);
GGML_SYCL_DEBUG("call %s done\n", __func__);
// exit(1);
}
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -3417,12 +3418,12 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
continue;
}
if (min_compute_capability > ggml_sycl_info().devices[id].cc) {
min_compute_capability = ggml_sycl_info().devices[id].cc;
if (min_compute_capability > ggml_sycl_info().device_infos[id].cc) {
min_compute_capability = ggml_sycl_info().device_infos[id].cc;
}
}
} else {
min_compute_capability = ggml_sycl_info().devices[ctx.device].cc;
min_compute_capability = ggml_sycl_info().device_infos[ctx.device].cc;
}
// check data types and tensor shapes for custom matrix multiplication kernels:
@ -4332,7 +4333,6 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
check_allow_device_id(device_id);
@ -4345,7 +4345,9 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
int id = ggml_backend_sycl_get_device_id(i);
auto & device = dpct::dev_mgr::instance().get_device(id);
queue_ptr stream = &(device.default_queue());
// queue_ptr stream = &(device.default_queue());
queue_ptr stream = ggml_sycl_info().device_infos[id].qptrs[0];
ggml_backend_sycl_buffer_types[id] = {
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
/* .context = */ new ggml_backend_sycl_buffer_type_context{id, GGML_SYCL_NAME + std::to_string(id), stream},

View file

@ -20,12 +20,16 @@ void* ggml_sycl_host_malloc(size_t size) try {
if (getenv("GGML_SYCL_NO_PINNED") != nullptr) {
return nullptr;
}
// ggml_sycl_info().device_mgr->first_queue
void* ptr = nullptr;
// allow to use dpct::get_in_order_queue() for host malloc
dpct::err0 err = CHECK_TRY_ERROR(
ptr = (void*)sycl::malloc_host(size, dpct::get_in_order_queue()));
auto q = dpct::get_in_order_queue();
// sycl::queue q = *ggml_sycl_info().device_mgr->qptrs[0][0];
dpct::err0 err = CHECK_TRY_ERROR(
ptr = (void*)sycl::malloc_host(size, q));
// printf("zjy ggml_sycl_host_malloc ptr=%p queue=%p size=%lu \n", ptr,q, size);
if (err != 0) {
// clear the error
fprintf(
@ -66,27 +70,6 @@ static inline int get_sycl_env(const char *env_name, int default_val) {
return user_number;
}
static inline bool env_existed(const char *env_name) {
char *user_device_string = getenv(env_name);
return user_device_string!=NULL;
}
static std::vector<int> get_sycl_visible_devices() {
static std::vector<int> device_ids;
char *devices_env = getenv("GGML_SYCL_VISIBLE_DEVICES");
if (devices_env != nullptr) {
std::string devices(devices_env);
std::replace(devices.begin(), devices.end(), ',', ' ');
std::stringstream ss(devices);
int tmp;
while (ss >> tmp) {
device_ids.push_back(tmp);
}
}
return device_ids;
}
void print_device_detail_part1(int id, sycl::device &device, std::string device_type) {
dpct::device_info prop;
@ -193,8 +176,7 @@ static ggml_sycl_device_info ggml_sycl_init() try {
initialized = true;
}
static ggml_sycl_device_info info = {};
info.refresh_device();
static ggml_sycl_device_info info;
if (info.device_count == 0) {
fprintf(stderr, "%s: failed to initialize " GGML_SYCL_NAME ": no available device found\n",
@ -215,288 +197,4 @@ ggml_sycl_device_info &ggml_sycl_info() {
return info;
}
//--sycl_device_mgr--
sycl_device_mgr::sycl_device_mgr(
ggml_sycl_backend_device_filter device_filter) {
switch (device_filter) {
case SYCL_DEVICES_TOP_LEVEL_ZERO:
detect_sycl_gpu_list_with_max_cu();
create_context_for_group_gpus();
break;
case SYCL_ALL_DEVICES:
detect_all_sycl_device_list();
create_context_for_devices();
break;
case SYCL_VISIBLE_DEVICES:
detect_sycl_visible_device_list();
create_context_for_devices();
break;
default:
std::cerr << "sycl_device_mgr: Invalid device_filter " << device_filter
<< std::endl;
}
init_allow_devices();
}
/*
Bind all gpus in same host with same context, for better performance in
device-to-device copy in the future.
*/
void sycl_device_mgr::create_context_for_group_gpus() {
sycl::context ctx = sycl::context(devices);
assert(device_ids.size() > 0);
first_queue = _create_queue_ptr(devices[0]);
sycl::context ctx0 = first_queue->get_context();
for (int i = 0; i < device_ids.size(); i++) {
ctxs.push_back(ctx0);
}
}
sycl::queue *sycl_device_mgr::_create_queue_ptr(sycl::device device) {
auto q = dpct::get_current_device().create_queue(device);
return q;
// _queues.push_back(q);
// return & _queues.back();
}
sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::device &device) {
dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
auto qptr = _create_queue_ptr(device);
return qptr;
}
sycl::queue *sycl_device_mgr::create_queue_for_device_id(int device_id) {
int i = get_device_index(device_id);
sycl::device device = dpct::dev_mgr::instance().get_device(device_id);
return create_queue_for_device(device);
}
int sycl_device_mgr::get_device_index(int device_id) {
for (int i = 0; i < device_ids.size(); i++) {
if (device_ids[i] == device_id)
return i;
}
return -1;
}
void sycl_device_mgr::create_context_for_devices() {
for (int i = 0; i < device_ids.size(); i++) {
sycl::context ctx = sycl::context(devices[i]);
ctxs.push_back(ctx);
}
}
void sycl_device_mgr::init_allow_devices() {
device_list = "";
for (size_t i = 0; i < device_ids.size(); ++i) {
device_list += std::to_string(device_ids[i]);
device_list += ",";
}
if (device_list.length() > 1) {
device_list.pop_back();
}
}
bool sycl_device_mgr::is_allowed_device(int device_id) {
return std::find(device_ids.begin(), device_ids.end(), device_id) !=
device_ids.end();
}
void sycl_device_mgr::detect_all_sycl_device_list() try {
int device_count = dpct::dev_mgr::instance().device_count();
for (int id = 0; id < device_count; id++) {
add_device_info(id);
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
void sycl_device_mgr::detect_sycl_visible_device_list() try {
std::vector<int> sycl_devices = get_sycl_visible_devices();
int device_count = dpct::dev_mgr::instance().device_count();
for (int i = 0; i < sycl_devices.size(); i++) {
int id = sycl_devices[i];
if (id >= device_count) {
std::cerr << __func__ << ": invalid device_id:" << id
<< " from GGML_SYCL_VISIBLE_DEVICES="
<< getenv("GGML_SYCL_VISIBLE_DEVICES")
<< ", available IDs: ";
if (device_count > 1) {
std::cerr << "[0, " << device_count - 1 << "]";
} else if (device_count == 1) {
std::cerr << "[0]";
} else {
std::cerr << "[]";
}
std::cerr << std::endl;
}
add_device_info(id);
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
/*
Use all GPUs with same top max compute units
*/
void sycl_device_mgr::detect_sycl_gpu_list_with_max_cu() try {
int device_count = dpct::dev_mgr::instance().device_count();
int local_max_compute_units = 0;
for (int id = 0; id < device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
if (!device.is_gpu())
continue;
dpct::device_info prop;
dpct::get_device_info(prop, device);
if (local_max_compute_units < prop.get_max_compute_units())
local_max_compute_units = prop.get_max_compute_units();
}
for (int id = 0; id < device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
if (!device.is_gpu())
continue;
dpct::device_info prop;
dpct::get_device_info(prop, device);
if (local_max_compute_units == prop.get_max_compute_units() &&
is_ext_oneapi_device(device)) {
add_device_info(id);
}
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
int sycl_device_mgr::get_device_count() { return (int)device_ids.size(); }
bool sycl_device_mgr::is_ext_oneapi_device(const sycl::device &dev) {
sycl::backend dev_backend = dev.get_backend();
if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
dev_backend == sycl::backend::ext_oneapi_cuda ||
dev_backend == sycl::backend::ext_oneapi_hip)
return true;
return false;
}
void sycl_device_mgr::add_device_info(int id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
device_ids.push_back(id);
devices.push_back(device);
dpct::device_info prop;
dpct::get_device_info(prop, device);
work_group_sizes.push_back(prop.get_max_work_group_size());
max_compute_units.push_back(prop.get_max_compute_units());
hw_familys.push_back(get_device_family(&device));
}
//--sycl_device_mgr--
//--ggml_sycl_device_info--
void ggml_sycl_device_info::print_gpu_device_list() {
GGML_ASSERT(device_mgr);
char *hint = NULL;
if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
"GGML_SYCL_VISIBLE_DEVICES=%s\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
getenv("ONEAPI_DEVICE_SELECTOR"),
getenv("GGML_SYCL_VISIBLE_DEVICES"));
} else if (oneapi_device_selector_existed) {
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
getenv("ONEAPI_DEVICE_SELECTOR"));
} else if (sycl_visible_devices_existed) {
hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
getenv("GGML_SYCL_VISIBLE_DEVICES"));
} else {
hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
"units:%d, to use any SYCL devices, set/export "
"GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
device_mgr->max_compute_units[0]);
}
}
int ggml_sycl_device_info::work_group_size(int device_id) {
GGML_ASSERT(device_mgr);
return device_mgr->work_group_sizes[device_id];
}
void ggml_sycl_device_info::refresh_device() {
oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
if (!device_mgr)
delete device_mgr;
if (sycl_visible_devices_existed) {
device_mgr = new sycl_device_mgr(SYCL_VISIBLE_DEVICES);
} else if (oneapi_device_selector_existed) {
device_mgr = new sycl_device_mgr(SYCL_ALL_DEVICES);
} else {
device_mgr = new sycl_device_mgr(SYCL_DEVICES_TOP_LEVEL_ZERO);
}
device_count = device_mgr->get_device_count();
int64_t total_vram = 0;
for (int i = 0; i < device_count; ++i) {
int id = get_device_id(i);
devices[id].vmm = 0;
dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
prop, dpct::dev_mgr::instance().get_device(id))));
default_tensor_split[i] =
total_vram; // continue data, so use device index
total_vram += prop.get_global_mem_size();
devices[id].cc =
100 * prop.get_major_version() + 10 * prop.get_minor_version();
}
for (int i = 0; i < device_count; ++i) {
default_tensor_split[i] /=
total_vram; // continue data, so use device index
}
print_gpu_device_list();
}
bool ggml_sycl_device_info::is_allowed_device(int device_id) {
return device_mgr->is_allowed_device(device_id);
}
const char *ggml_sycl_device_info::devices_list() {
return device_mgr->device_list.c_str();
}
int ggml_sycl_device_info::get_device_id(int device_index) {
if (device_index < device_mgr->device_ids.size()) {
return device_mgr->device_ids.at(device_index);
} else {
std::cerr << __func__ << ":SYCL device:" << device_index
<< " is out of range:[" << devices_list() << "]" << std::endl;
std::exit(1);
}
}
int ggml_sycl_device_info::hw_family(int device_id) {
return device_mgr->hw_familys[device_id];
}
//--ggml_sycl_device_info--

View file

@ -21,12 +21,13 @@
#include "ggml-sycl.h"
#include "presets.hpp"
#include "sycl_hw.hpp"
#include "sycl_device.hpp"
#define GGML_COMMON_DECL_SYCL
#define GGML_COMMON_IMPL_SYCL
#include "ggml-common.h"
void* ggml_sycl_host_malloc(size_t size);
void ggml_sycl_host_free(void* ptr);
static int g_ggml_sycl_debug = 0;
@ -86,12 +87,6 @@ enum ggml_sycl_backend_gpu_mode {
SYCL_MUL_GPU_MODE
};
enum ggml_sycl_backend_device_filter {
SYCL_ALL_DEVICES = 0,
SYCL_DEVICES_TOP_LEVEL_ZERO,
SYCL_VISIBLE_DEVICES
};
static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
static void crash() {
@ -169,10 +164,10 @@ inline dpct::err0 ggml_sycl_set_device(const int device_id) try {
int current_device_id;
SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));
GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d, current_device_id=%d\n", device_id, current_device_id);
if (device_id == current_device_id) {
return 0;
}
GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d, current_device_id=%d\n", device_id, current_device_id);
return CHECK_TRY_ERROR(dpct::select_device(device_id));
@ -183,67 +178,6 @@ inline dpct::err0 ggml_sycl_set_device(const int device_id) try {
std::exit(1);
}
class sycl_device_mgr {
public:
std::vector<int> device_ids;
std::vector<sycl::device> devices;
std::vector<int> max_compute_units;
std::vector<int> work_group_sizes;
std::vector<int> hw_familys;
sycl::queue *first_queue;
std::vector<sycl::queue> _queues;
std::vector<sycl::context> ctxs;
std::string device_list = "";
sycl_device_mgr(ggml_sycl_backend_device_filter device_filter);
sycl::queue *_create_queue_ptr(sycl::device device); //internal API to hide dpct API.
void create_context_for_group_gpus();
sycl::queue *create_queue_for_device(sycl::device &device);
sycl::queue *create_queue_for_device_id(int device_id);
int get_device_index(int device_id);
void create_context_for_devices();
void init_allow_devices();
bool is_allowed_device(int device_id);
void detect_all_sycl_device_list();
void detect_sycl_visible_device_list();
void detect_sycl_gpu_list_with_max_cu();
int get_device_count();
bool is_ext_oneapi_device(const sycl::device &dev);
void add_device_info(int id);
};
struct ggml_sycl_device_info {
int device_count;
bool oneapi_device_selector_existed = false;
bool sycl_visible_devices_existed = false;
struct sycl_device_info {
int cc; // compute capability
// int nsm; // number of streaming multiprocessors
// size_t smpb; // max. shared memory per block
bool vmm; // virtual memory support
size_t total_vram;
};
sycl_device_info devices[GGML_SYCL_MAX_DEVICES] = {};
std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
sycl_device_mgr *device_mgr = NULL;
void print_gpu_device_list();
int work_group_size(int device_id);
void refresh_device();
bool is_allowed_device(int device_id);
const char* devices_list();
int get_device_id(int device_index);
int hw_family(int device_id);
};
struct ggml_sycl_pool {
virtual ~ggml_sycl_pool() = default;
@ -309,17 +243,17 @@ struct ggml_backend_sycl_context {
queue_ptr qptrs[GGML_SYCL_MAX_DEVICES][GGML_SYCL_MAX_STREAMS] = { { nullptr } };
explicit ggml_backend_sycl_context(struct ggml_sycl_device_info &sycl_device_info, int device_id) :
device(device_id),
explicit ggml_backend_sycl_context(struct ggml_sycl_device_info &sycl_device_info, int id) :
device(id),
name(GGML_SYCL_NAME + std::to_string(device)) {
for (int i=0;i<GGML_SYCL_MAX_STREAMS; i++){
qptrs[device_id][i] = sycl_device_info.device_mgr->create_queue_for_device_id(device_id);
qptrs[id][i] = sycl_device_info.device_infos[id].qptrs[i];
}
}
queue_ptr stream(int device, int stream) {
assert(qptrs[device][stream] != nullptr);
return qptrs[device][stream];
queue_ptr stream(int id, int stream) {
assert(qptrs[id][stream] != nullptr);
return qptrs[id][stream];
}
queue_ptr stream() {
@ -349,10 +283,10 @@ static inline void exit_with_stack_print() {
static inline int get_sycl_env(const char *env_name, int default_val);
static inline bool env_existed(const char *env_name);
void* ggml_sycl_host_malloc(size_t size);
void ggml_sycl_host_free(void* ptr);
static std::vector<int> get_sycl_visible_devices();
void ggml_backend_sycl_print_sycl_devices();
static ggml_sycl_device_info ggml_sycl_init();
ggml_sycl_device_info &ggml_sycl_info();

View file

@ -593,12 +593,14 @@ namespace dpct
typedef std::mutex mutex_type;
public:
device_ext() : sycl::device() {}
~device_ext() {
device_ext() : sycl::device(), _ctx(*this) {}
~device_ext()
{
std::lock_guard<mutex_type> lock(m_mutex);
clear_queues();
}
device_ext(const sycl::device &base) : sycl::device(base) {
device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this)
{
std::lock_guard<mutex_type> lock(m_mutex);
init_queues();
}
@ -711,9 +713,9 @@ namespace dpct
return create_in_order_queue(enable_exception_handler);
}
sycl::queue *create_queue(sycl::device device,
sycl::queue *create_queue(sycl::context context, sycl::device device,
bool enable_exception_handler = false) {
return create_in_order_queue(device, enable_exception_handler);
return create_in_order_queue(context, device, enable_exception_handler);
}
sycl::queue *create_in_order_queue(bool enable_exception_handler = false) {
@ -722,10 +724,10 @@ namespace dpct
sycl::property::queue::in_order());
}
sycl::queue *create_in_order_queue(sycl::device device,
sycl::queue *create_in_order_queue(sycl::context context, sycl::device device,
bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(device, enable_exception_handler,
return create_queue_impl(context, device, enable_exception_handler,
sycl::property::queue::in_order());
}
@ -735,7 +737,8 @@ namespace dpct
return create_queue_impl(enable_exception_handler);
}
void destroy_queue(sycl::queue *&queue) {
void destroy_queue(sycl::queue *&queue)
{
std::lock_guard<mutex_type> lock(m_mutex);
_queues.erase(std::remove_if(_queues.begin(), _queues.end(),
[=](const std::shared_ptr<sycl::queue> &q) -> bool
@ -745,14 +748,17 @@ namespace dpct
_queues.end());
queue = nullptr;
}
void set_saved_queue(sycl::queue *q) {
void set_saved_queue(sycl::queue *q)
{
std::lock_guard<mutex_type> lock(m_mutex);
_saved_queue = q;
}
sycl::queue *get_saved_queue() const {
sycl::queue *get_saved_queue() const
{
std::lock_guard<mutex_type> lock(m_mutex);
return _saved_queue;
}
sycl::context get_context() const { return _ctx; }
private:
void clear_queues() {
@ -767,17 +773,18 @@ namespace dpct
_saved_queue = &default_queue();
}
/// Caller should acquire resource \p m_mutex before calling this
/// function.
/// Caller should acquire resource \p m_mutex before calling this function.
template <class... Properties>
sycl::queue *create_queue_impl(bool enable_exception_handler,
Properties... properties) {
Properties... properties)
{
sycl::async_handler eh = {};
if (enable_exception_handler) {
if (enable_exception_handler)
{
eh = exception_handler;
}
_queues.push_back(std::make_shared<sycl::queue>(
*this, eh,
_ctx, *this, eh,
sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(),
@ -788,7 +795,7 @@ namespace dpct
}
template <class... Properties>
sycl::queue *create_queue_impl(sycl::device device,
sycl::queue *create_queue_impl(sycl::context context, sycl::device device,
bool enable_exception_handler,
Properties... properties) {
sycl::async_handler eh = {};
@ -796,11 +803,11 @@ namespace dpct
eh = exception_handler;
}
_queues.push_back(std::make_shared<sycl::queue>(
device, eh,
context, device, eh,
sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED
#ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(),
#endif
#endif
properties...)));
return _queues.back().get();
@ -811,6 +818,7 @@ namespace dpct
}
sycl::queue *_q_in_order, *_q_out_of_order;
sycl::queue *_saved_queue;
sycl::context _ctx;
std::vector<std::shared_ptr<sycl::queue>> _queues;
mutable mutex_type m_mutex;
};

View file

@ -1779,7 +1779,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -1894,7 +1894,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2009,7 +2009,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2124,7 +2124,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2239,7 +2239,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2354,7 +2354,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2477,7 +2477,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2605,7 +2605,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2726,7 +2726,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {
@ -2847,7 +2847,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
const int compute_capability = ggml_sycl_info().devices[id].cc;
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
int mmq_x, mmq_y, nwarps;
if (compute_capability >= VER_GEN13) {

View file

@ -0,0 +1,286 @@
#include "sycl_device.hpp"
#include "sycl_hw.hpp"
void ggml_sycl_device_info::init(
ggml_sycl_backend_device_filter device_filter) {
switch (device_filter) {
case SYCL_DEVICES_TOP_LEVEL_ZERO:
detect_sycl_gpu_list_with_max_cu();
create_context_for_devices();
break;
case SYCL_ALL_DEVICES:
detect_all_sycl_device_list();
create_context_for_devices();
break;
case SYCL_VISIBLE_DEVICES:
detect_sycl_visible_device_list();
create_context_for_devices();
break;
default:
std::cerr << "ggml_sycl_device_info: Invalid device_filter " << device_filter
<< std::endl;
}
init_allow_devices();
device_count = ids.size();
}
/*
Bind all devices in same host with same context, for better performance in
device-to-device copy in the future.
*/
void ggml_sycl_device_info::create_context_for_devices() {
assert(devices.size() > 0);
sycl::context ctx = sycl::context(devices);
first_queue = dpct::get_current_device().create_queue(ctx, devices[0]);
co_ctx = first_queue->get_context();
}
sycl::queue *ggml_sycl_device_info::_create_queue_ptr(sycl::device device) {
auto q = dpct::get_current_device().create_queue(co_ctx, device);
return q;
}
sycl::queue *ggml_sycl_device_info::create_queue_for_device(sycl::device &device) {
dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
auto qptr = _create_queue_ptr(device);
return qptr;
}
sycl::queue *ggml_sycl_device_info::create_queue_for_device_id(int id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
return create_queue_for_device(device);
}
int ggml_sycl_device_info::get_device_index(int id) {
for (int i = 0; i < ids.size(); i++) {
if (ids[i] == id)
return i;
}
return -1;
}
void ggml_sycl_device_info::init_allow_devices() {
device_list = "";
for (auto & id: ids) {
device_list += std::to_string(id);
device_list += ",";
}
if (device_list.length() > 1) {
device_list.pop_back();
}
}
bool ggml_sycl_device_info::is_allowed_device(int id) {
return std::find(ids.begin(), ids.end(), id) != ids.end();
}
void ggml_sycl_device_info::detect_all_sycl_device_list() try {
int all_device_count = dpct::dev_mgr::instance().device_count();
for (int id = 0; id < all_device_count; id++) {
add_device_info(id);
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
std::vector<int> ggml_sycl_device_info::get_sycl_visible_devices() {
static std::vector<int> device_ids;
char *devices_env = getenv("GGML_SYCL_VISIBLE_DEVICES");
if (devices_env != nullptr) {
std::string devices(devices_env);
std::replace(devices.begin(), devices.end(), ',', ' ');
std::stringstream ss(devices);
int tmp;
while (ss >> tmp) {
device_ids.push_back(tmp);
}
}
return device_ids;
}
void ggml_sycl_device_info::detect_sycl_visible_device_list() try {
std::vector<int> sycl_devices = get_sycl_visible_devices();
int all_device_count = dpct::dev_mgr::instance().device_count();
for (auto & id: sycl_devices) {
if (id >= all_device_count) {
std::cerr << __func__ << ": invalid device_id:" << id
<< " from GGML_SYCL_VISIBLE_DEVICES="
<< getenv("GGML_SYCL_VISIBLE_DEVICES")
<< ", available IDs: ";
if (all_device_count > 1) {
std::cerr << "[0, " << all_device_count - 1 << "]";
} else if (all_device_count == 1) {
std::cerr << "[0]";
} else {
std::cerr << "[]";
}
std::cerr << std::endl;
}
add_device_info(id);
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
/*
Use all GPUs with same top max compute units
*/
void ggml_sycl_device_info::detect_sycl_gpu_list_with_max_cu() try {
int all_device_count = dpct::dev_mgr::instance().device_count();
int local_max_compute_units = 0;
for (int id = 0; id < all_device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
if (!device.is_gpu())
continue;
dpct::device_info prop;
dpct::get_device_info(prop, device);
if (local_max_compute_units < prop.get_max_compute_units())
local_max_compute_units = prop.get_max_compute_units();
}
for (int id = 0; id < all_device_count; id++) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
if (!device.is_gpu())
continue;
dpct::device_info prop;
dpct::get_device_info(prop, device);
if (local_max_compute_units == prop.get_max_compute_units() &&
is_ext_oneapi_device(device)) {
add_device_info(id);
}
}
return;
} catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
int ggml_sycl_device_info::get_device_count() { return device_count; }
bool ggml_sycl_device_info::is_ext_oneapi_device(const sycl::device &dev) {
sycl::backend dev_backend = dev.get_backend();
if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
dev_backend == sycl::backend::ext_oneapi_cuda ||
dev_backend == sycl::backend::ext_oneapi_hip)
return true;
return false;
}
void ggml_sycl_device_info::add_device_info(int id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
dpct::device_info prop;
dpct::get_device_info(prop, device);
ids.push_back(id);
devices.push_back(device);
device_infos[id].id = id;
device_infos[id].device = device;
device_infos[id].max_work_group_sizes = prop.get_max_work_group_size();
device_infos[id].max_compute_units = prop.get_max_compute_units();
device_infos[id].hw_family = get_device_family(&device);
for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
device_infos[id].qptrs[i] = create_queue_for_device_id(id);
}
}
void ggml_sycl_device_info::print_gpu_device_list() {
char *hint = NULL;
if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
"GGML_SYCL_VISIBLE_DEVICES=%s\n";
fprintf(stderr, hint, get_device_count(), devices_list(),
getenv("ONEAPI_DEVICE_SELECTOR"),
getenv("GGML_SYCL_VISIBLE_DEVICES"));
} else if (oneapi_device_selector_existed) {
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
fprintf(stderr, hint, get_device_count(), devices_list(),
getenv("ONEAPI_DEVICE_SELECTOR"));
} else if (sycl_visible_devices_existed) {
hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
fprintf(stderr, hint, get_device_count(), devices_list(),
getenv("GGML_SYCL_VISIBLE_DEVICES"));
} else {
hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
"units:%d, to use any SYCL devices, set/export "
"GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
fprintf(stderr, hint, get_device_count(), devices_list(),
device_infos[0].max_compute_units);
}
}
int ggml_sycl_device_info::work_group_size(int id) {
GGML_ASSERT(is_allowed_device(id));
return device_infos[id].max_work_group_sizes;
}
ggml_sycl_device_info::ggml_sycl_device_info() {
oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
if (sycl_visible_devices_existed) {
init(SYCL_VISIBLE_DEVICES);
} else if (oneapi_device_selector_existed) {
init(SYCL_ALL_DEVICES);
} else {
init(SYCL_DEVICES_TOP_LEVEL_ZERO);
}
int64_t total_vram = 0;
for (int i = 0; i < device_count; ++i) {
int id = get_device_id(i);
device_infos[id].vmm = 0;
dpct::device_info prop;
dpct::get_device_info(
prop, dpct::dev_mgr::instance().get_device(id));
// continue data, so use device index
default_tensor_split[i] = total_vram;
total_vram += prop.get_global_mem_size();
device_infos[id].cc =
100 * prop.get_major_version() + 10 * prop.get_minor_version();
}
// continue data, so use device index
for (int i = 0; i < device_count; ++i) {
default_tensor_split[i] /= total_vram;
}
print_gpu_device_list();
}
const char *ggml_sycl_device_info::devices_list() {
return device_list.c_str();
}
int ggml_sycl_device_info::get_device_id(int device_index) {
if (device_index < device_count) {
return ids.at(device_index);
} else {
std::cerr << __func__ << ":SYCL device:" << device_index
<< " is out of range:[" << devices_list() << "]" << std::endl;
std::exit(1);
}
}
int ggml_sycl_device_info::hw_family(int id) {
return device_infos[id].hw_family;
}
static inline bool env_existed(const char *env_name) {
char *user_device_string = getenv(env_name);
return user_device_string!=NULL;
}

View file

@ -0,0 +1,83 @@
#ifndef SYCL_DEVICE_HPP
#define SYCL_DEVICE_HPP
#include <algorithm>
#include <stdio.h>
#include <vector>
#include <sycl/sycl.hpp>
#include "dpct/helper.hpp"
#include "ggml-sycl.h"
#include "presets.hpp"
// #include "common.hpp"
enum ggml_sycl_backend_device_filter {
SYCL_ALL_DEVICES = 0,
SYCL_DEVICES_TOP_LEVEL_ZERO,
SYCL_VISIBLE_DEVICES
};
struct sycl_device_info {
int cc; // compute capability
// int nsm; // number of streaming multiprocessors
// size_t smpb; // max. shared memory per block
bool vmm; // virtual memory support
size_t total_vram;
int id;
sycl::device device;
int max_compute_units;
int max_work_group_sizes;
int hw_family;
sycl::context ctx;
sycl::queue * qptrs[GGML_SYCL_MAX_STREAMS] = { nullptr };
};
struct ggml_sycl_device_info {
int device_count;
bool oneapi_device_selector_existed = false;
bool sycl_visible_devices_existed = false;
std::vector<int> ids;
std::vector<sycl::device> devices;
sycl::queue *first_queue;
std::string device_list;
sycl::context co_ctx;
sycl_device_info device_infos[GGML_SYCL_MAX_DEVICES];
std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
ggml_sycl_device_info();
void init(ggml_sycl_backend_device_filter device_filter);
void print_gpu_device_list();
int work_group_size(int device_id);
bool is_allowed_device(int device_id);
const char* devices_list();
int get_device_id(int device_index);
int hw_family(int device_id);
sycl::queue *_create_queue_ptr(sycl::device device); //internal API to hide dpct API.
void create_context_for_group_gpus();
sycl::queue *create_queue_for_device(sycl::device &device);
sycl::queue *create_queue_for_device_id(int device_id);
int get_device_index(int device_id);
void create_context_for_devices();
void init_allow_devices();
void detect_all_sycl_device_list();
void detect_sycl_visible_device_list();
void detect_sycl_gpu_list_with_max_cu();
int get_device_count();
bool is_ext_oneapi_device(const sycl::device &dev);
void add_device_info(int id);
std::vector<sycl::device> get_devices();
std::vector<int> get_sycl_visible_devices();
sycl::context &get_co_ctx() { return co_ctx; }
};
static inline bool env_existed(const char *env_name);
#endif // SYCL_DEVICE_HPP

View file

@ -8,8 +8,14 @@ SYCL_HW_FAMILY get_device_family(sycl::device *device_ptr) {
auto id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
auto id_prefix = id & 0xff00;
const std::vector<int> Xe_ARC = {0x5600, 0x4f00};
const std::vector<int> Xe_Iris_IDs = {0x4900, 0xa700};
const std::vector<int> UHD_IDs = {0x4600};
if (is_in_vector(Xe_Iris_IDs, id_prefix) or is_in_vector(UHD_IDs, id_prefix)) {
return SYCL_HW_FAMILY_INTEL_IGPU;
} else if (is_in_vector(Xe_ARC, id_prefix)) {
return SYCL_HW_FAMILY_INTEL_ARC;
} else {
std::cerr << "No support PCI_ID: " << std::hex << id << std::endl;
return SYCL_HW_FAMILY_UNKNOWN;

View file

@ -8,13 +8,10 @@
#include <sycl/sycl.hpp>
// const int Xe_ARC[] = {0x5600, 0x4f};
const std::vector<int> Xe_Iris_IDs = {0x4900, 0xa700};
const std::vector<int> UHD_IDs = {0x4600};
enum SYCL_HW_FAMILY {
SYCL_HW_FAMILY_UNKNOWN = -1,
SYCL_HW_FAMILY_INTEL_IGPU = 0
SYCL_HW_FAMILY_INTEL_IGPU = 0,
SYCL_HW_FAMILY_INTEL_ARC = 1
};
bool is_in_vector(std::vector<int> &vec, int item);