Revert "[SYCL] fix error when set main gpu to non-zero (#5901)" (#5918)

This reverts commit ceca1aef07.
This commit is contained in:
Neo Zhang Jianyu 2024-03-07 19:14:49 +08:00 committed by Jianyu Zhang
parent c810764c7e
commit 979373c17f
3 changed files with 91 additions and 96 deletions

View file

@ -3636,31 +3636,12 @@ class sycl_gpu_mgr {
int work_group_size = 0; int work_group_size = 0;
std::string gpus_list = ""; std::string gpus_list = "";
/*
Use all GPU with same top max compute units
*/
sycl_gpu_mgr() { sycl_gpu_mgr() {
detect_sycl_gpu_list_with_max_cu(); detect_sycl_gpu_list_with_max_cu();
get_allow_gpus(); get_allow_gpus();
create_context_with_gpus(); create_context_with_gpus();
} }
/*
Use the assigned GPU as only one
*/
sycl_gpu_mgr(int main_gpu_id) {
sycl::device device = dpct::dev_mgr::instance().get_device(main_gpu_id);
dpct::device_info prop;
dpct::get_device_info(prop, device);
gpus.push_back(main_gpu_id);
devices.push_back(device);
work_group_size = prop.get_max_work_group_size();
max_compute_units = prop.get_max_compute_units();
get_allow_gpus();
create_context_with_gpus();
}
void create_context_with_gpus() { void create_context_with_gpus() {
sycl::context ctx = sycl::context(devices); sycl::context ctx = sycl::context(devices);
assert(gpus.size() > 0); assert(gpus.size() > 0);
@ -3676,7 +3657,7 @@ class sycl_gpu_mgr {
gpus_list += std::to_string(gpus[i]); gpus_list += std::to_string(gpus[i]);
gpus_list += ","; gpus_list += ",";
} }
if (gpus_list.length() > 1) { if (gpus_list.length() > 2) {
gpus_list.pop_back(); gpus_list.pop_back();
} }
} }
@ -3725,8 +3706,8 @@ class sycl_gpu_mgr {
if (gpus[i] == id) if (gpus[i] == id)
return i; return i;
} }
printf("miss to get device index by id=%d\n", id); assert(false);
GGML_ASSERT(false); return -1;
} }
int get_next_index(int id) { int get_next_index(int id) {
@ -3735,7 +3716,8 @@ class sycl_gpu_mgr {
if (gpus[i] == id) if (gpus[i] == id)
return i; return i;
} }
GGML_ASSERT(false); assert(false);
return -1;
} }
}; };
@ -3744,7 +3726,6 @@ static int g_device_count = -1;
static int g_all_sycl_device_count = -1; static int g_all_sycl_device_count = -1;
static int g_main_device = -1; static int g_main_device = -1;
static int g_main_device_id = -1; static int g_main_device_id = -1;
static bool g_ggml_backend_sycl_buffer_type_initialized = false;
static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {}; static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
@ -13334,6 +13315,7 @@ void ggml_backend_sycl_print_sycl_devices() {
} }
void print_gpu_device_list() { void print_gpu_device_list() {
<<<<<<< HEAD
GGML_ASSERT(g_sycl_gpu_mgr); GGML_ASSERT(g_sycl_gpu_mgr);
char* hint=NULL; char* hint=NULL;
@ -13346,6 +13328,12 @@ void print_gpu_device_list() {
g_sycl_gpu_mgr->get_gpu_count(), g_sycl_gpu_mgr->get_gpu_count(),
g_sycl_gpu_mgr->gpus_list.c_str(), g_sycl_gpu_mgr->gpus_list.c_str(),
g_sycl_gpu_mgr->max_compute_units); g_sycl_gpu_mgr->max_compute_units);
=======
fprintf(stderr, "detect %d SYCL GPUs: [%s] with Max compute units:%d\n",
g_sycl_gpu_mgr->get_gpu_count(),
g_sycl_gpu_mgr->gpus_list.c_str(),
g_sycl_gpu_mgr->max_compute_units);
>>>>>>> 89fb735 (Revert "[SYCL] fix error when set main gpu to non-zero (#5901)" (#5918))
} }
int get_sycl_env(const char *env_name, int default_val) { int get_sycl_env(const char *env_name, int default_val) {
@ -13381,15 +13369,6 @@ void ggml_init_sycl() try {
#else #else
fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__); fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
#endif #endif
/* NOT REMOVE, keep it for next optimize for XMX.
#if defined(SYCL_USE_XMX)
fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
#else
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
#endif
*/
if (CHECK_TRY_ERROR(g_all_sycl_device_count = if (CHECK_TRY_ERROR(g_all_sycl_device_count =
dpct::dev_mgr::instance().device_count()) != 0) { dpct::dev_mgr::instance().device_count()) != 0) {
initialized = true; initialized = true;
@ -13398,18 +13377,29 @@ void ggml_init_sycl() try {
} }
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES); GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
ggml_backend_sycl_print_sycl_devices(); ggml_backend_sycl_print_sycl_devices();
<<<<<<< HEAD
initialized = true; initialized = true;
g_sycl_loaded = true; g_sycl_loaded = true;
} }
=======
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
g_device_count = g_sycl_gpu_mgr->get_gpu_count(); g_device_count = g_sycl_gpu_mgr->get_gpu_count();
g_work_group_size = g_sycl_gpu_mgr->work_group_size; g_work_group_size = g_sycl_gpu_mgr->work_group_size;
print_gpu_device_list();
>>>>>>> 89fb735 (Revert "[SYCL] fix error when set main gpu to non-zero (#5901)" (#5918))
int64_t total_vram = 0; int64_t total_vram = 0;
/* NOT REMOVE, keep it for next optimize for XMX.
#if defined(SYCL_USE_XMX)
fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
#else
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
#endif
*/
for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) { for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
g_device_caps[id].vmm = 0; g_device_caps[id].vmm = 0;
g_device_caps[id].device_id = -1; g_device_caps[id].device_id = -1;
@ -13452,6 +13442,10 @@ void ggml_init_sycl() try {
// create sycl handle // create sycl handle
SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream)); SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
} }
initialized = true;
g_sycl_loaded = true;
}
} }
catch (sycl::exception const &exc) { catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@ -16906,24 +16900,22 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
/* .is_host = */ nullptr, /* .is_host = */ nullptr,
}; };
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) { ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
if (device_index>=g_device_count or device_index<0) {
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
device_index, g_device_count-1);
GGML_ASSERT(device_index<g_device_count);
}
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES]; static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
if (!g_ggml_backend_sycl_buffer_type_initialized) { static bool ggml_backend_sycl_buffer_type_initialized = false;
if (!ggml_backend_sycl_buffer_type_initialized) {
for (int i = 0; i < g_device_count; i++) { for (int i = 0; i < g_device_count; i++) {
ggml_backend_sycl_buffer_types[i] = { ggml_backend_sycl_buffer_types[i] = {
/* .iface = */ ggml_backend_sycl_buffer_type_interface, /* .iface = */ ggml_backend_sycl_buffer_type_interface,
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])}, /* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
}; };
} }
g_ggml_backend_sycl_buffer_type_initialized = true; ggml_backend_sycl_buffer_type_initialized = true;
} }
return &ggml_backend_sycl_buffer_types[device_index];
return &ggml_backend_sycl_buffer_types[device];
} }
// sycl split buffer type // sycl split buffer type
@ -17672,6 +17664,7 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
return g_sycl_gpu_mgr->get_index(device_id); return g_sycl_gpu_mgr->get_index(device_id);
} }
<<<<<<< HEAD
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) { GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
return g_sycl_gpu_mgr->gpus[device_index]; return g_sycl_gpu_mgr->gpus[device_index];
} }
@ -17704,6 +17697,8 @@ GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
g_ggml_backend_sycl_buffer_type_initialized = false; g_ggml_backend_sycl_buffer_type_initialized = false;
} }
=======
>>>>>>> 89fb735 (Revert "[SYCL] fix error when set main gpu to non-zero (#5901)" (#5918))
extern "C" int ggml_backend_sycl_reg_devices(); extern "C" int ggml_backend_sycl_reg_devices();
int ggml_backend_sycl_reg_devices() { int ggml_backend_sycl_reg_devices() {

View file

@ -29,9 +29,13 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split); GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total); GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id); GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
<<<<<<< HEAD
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index); GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id); GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode(); GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
=======
>>>>>>> 89fb735 (Revert "[SYCL] fix error when set main gpu to non-zero (#5901)" (#5918))
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -3750,14 +3750,6 @@ static bool llm_load_tensors(
model.main_gpu = main_gpu; model.main_gpu = main_gpu;
model.n_gpu_layers = n_gpu_layers; model.n_gpu_layers = n_gpu_layers;
#ifdef GGML_USE_SYCL
if (split_mode == LLAMA_SPLIT_MODE_NONE) {
ggml_backend_sycl_set_single_device(main_gpu);
//SYCL use device index (0, 1, 2), instead if device id.
main_gpu = ggml_backend_sycl_get_device_index(main_gpu);
}
#endif
const int64_t n_layer = hparams.n_layer; const int64_t n_layer = hparams.n_layer;
const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0); const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
@ -12278,12 +12270,16 @@ struct llama_context * llama_new_context_with_model(
ctx->backends.push_back(backend); ctx->backends.push_back(backend);
} else { } else {
// LLAMA_SPLIT_LAYER requires a backend for each GPU // LLAMA_SPLIT_LAYER requires a backend for each GPU
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) { <<<<<<< HEAD
ggml_backend_t backend = ggml_backend_sycl_init(i); =======
if (backend == nullptr) {
int id_list[GGML_SYCL_MAX_DEVICES]; int id_list[GGML_SYCL_MAX_DEVICES];
ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES); ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, id_list[i], i); >>>>>>> 89fb735 (Revert "[SYCL] fix error when set main gpu to non-zero (#5901)" (#5918))
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
int device_id = id_list[i];
ggml_backend_t backend = ggml_backend_sycl_init(i);
if (backend == nullptr) {
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, device_id, i);
llama_free(ctx); llama_free(ctx);
return nullptr; return nullptr;
} }