rename device_infos to infos
This commit is contained in:
parent
6211ac0408
commit
254a750249
5 changed files with 41 additions and 44 deletions
|
@ -2280,11 +2280,11 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
|
||||||
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
|
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
|
||||||
int id = ggml_backend_sycl_get_device_id(i);
|
int id = ggml_backend_sycl_get_device_id(i);
|
||||||
if (tensor_split[i] < (i + 1 < ggml_sycl_info().device_count ? tensor_split[i + 1] : 1.0f)) {
|
if (tensor_split[i] < (i + 1 < ggml_sycl_info().device_count ? tensor_split[i + 1] : 1.0f)) {
|
||||||
if (min_compute_capability > ggml_sycl_info().device_infos[id].cc) {
|
if (min_compute_capability > ggml_sycl_info().infos[id].cc) {
|
||||||
min_compute_capability = ggml_sycl_info().device_infos[id].cc;
|
min_compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
}
|
}
|
||||||
if (max_compute_capability < ggml_sycl_info().device_infos[id].cc) {
|
if (max_compute_capability < ggml_sycl_info().infos[id].cc) {
|
||||||
max_compute_capability = ggml_sycl_info().device_infos[id].cc;
|
max_compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3416,12 +3416,12 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_compute_capability > ggml_sycl_info().device_infos[id].cc) {
|
if (min_compute_capability > ggml_sycl_info().infos[id].cc) {
|
||||||
min_compute_capability = ggml_sycl_info().device_infos[id].cc;
|
min_compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
min_compute_capability = ggml_sycl_info().device_infos[ctx.device].cc;
|
min_compute_capability = ggml_sycl_info().infos[ctx.device].cc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check data types and tensor shapes for custom matrix multiplication kernels:
|
// check data types and tensor shapes for custom matrix multiplication kernels:
|
||||||
|
@ -4342,7 +4342,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
|
||||||
if (!ggml_backend_sycl_buffer_type_initialized) {
|
if (!ggml_backend_sycl_buffer_type_initialized) {
|
||||||
for (auto & id: ggml_sycl_info().ids) {
|
for (auto & id: ggml_sycl_info().ids) {
|
||||||
auto & device = dpct::dev_mgr::instance().get_device(id);
|
auto & device = dpct::dev_mgr::instance().get_device(id);
|
||||||
queue_ptr stream = ggml_sycl_info().device_infos[id].qptrs[0];
|
queue_ptr stream = ggml_sycl_info().infos[id].qptrs[0];
|
||||||
ggml_backend_sycl_buffer_types[id] = {
|
ggml_backend_sycl_buffer_types[id] = {
|
||||||
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
||||||
/* .context = */ new ggml_backend_sycl_buffer_type_context{id, GGML_SYCL_NAME + std::to_string(id), stream},
|
/* .context = */ new ggml_backend_sycl_buffer_type_context{id, GGML_SYCL_NAME + std::to_string(id), stream},
|
||||||
|
|
|
@ -241,7 +241,7 @@ struct ggml_backend_sycl_context {
|
||||||
device(id),
|
device(id),
|
||||||
name(GGML_SYCL_NAME + std::to_string(device)) {
|
name(GGML_SYCL_NAME + std::to_string(device)) {
|
||||||
for (int i=0;i<GGML_SYCL_MAX_STREAMS; i++){
|
for (int i=0;i<GGML_SYCL_MAX_STREAMS; i++){
|
||||||
qptrs[id][i] = sycl_device_info.device_infos[id].qptrs[i];
|
qptrs[id][i] = sycl_device_info.infos[id].qptrs[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1779,7 +1779,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -1894,7 +1894,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2009,7 +2009,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2124,7 +2124,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2239,7 +2239,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2354,7 +2354,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2477,7 +2477,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2605,7 +2605,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2726,7 +2726,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
@ -2847,7 +2847,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_id()));
|
CHECK_TRY_ERROR(id = get_current_device_id()));
|
||||||
const int compute_capability = ggml_sycl_info().device_infos[id].cc;
|
const int compute_capability = ggml_sycl_info().infos[id].cc;
|
||||||
|
|
||||||
int mmq_x, mmq_y, nwarps;
|
int mmq_x, mmq_y, nwarps;
|
||||||
if (compute_capability >= VER_GEN13) {
|
if (compute_capability >= VER_GEN13) {
|
||||||
|
|
|
@ -22,18 +22,15 @@ void ggml_sycl_device_info::init(
|
||||||
m_device_filter = device_filter;
|
m_device_filter = device_filter;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_device_info::clear_device_infos() {
|
void ggml_sycl_device_info::clear_infos() {
|
||||||
ids.clear();
|
ids.clear();
|
||||||
devices.clear();
|
devices.clear();
|
||||||
|
|
||||||
for (int id=0;id<GGML_SYCL_MAX_DEVICES;id++) {
|
for (int id=0;id<GGML_SYCL_MAX_DEVICES;id++) {
|
||||||
device_infos[id].id = -1;
|
infos[id].id = -1;
|
||||||
device_infos[id].max_work_group_sizes = 0;
|
infos[id].max_work_group_sizes = 0;
|
||||||
device_infos[id].max_compute_units = 0;
|
infos[id].max_compute_units = 0;
|
||||||
device_infos[id].hw_family = -1;
|
infos[id].hw_family = -1;
|
||||||
// for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
|
|
||||||
// free(device_infos[id].qptrs[i]);
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
device_count = 0;
|
device_count = 0;
|
||||||
|
@ -43,7 +40,7 @@ void ggml_sycl_device_info::clear_device_infos() {
|
||||||
void ggml_sycl_device_info::init_single_mode(int main_gpu_id) {
|
void ggml_sycl_device_info::init_single_mode(int main_gpu_id) {
|
||||||
GGML_ASSERT(main_gpu_id<dpct::dev_mgr::instance().device_count());
|
GGML_ASSERT(main_gpu_id<dpct::dev_mgr::instance().device_count());
|
||||||
|
|
||||||
clear_device_infos();
|
clear_infos();
|
||||||
add_device_info(main_gpu_id);
|
add_device_info(main_gpu_id);
|
||||||
init_devices_dynamic_info();
|
init_devices_dynamic_info();
|
||||||
device_mode = SYCL_SINGLE_GPU_MODE;
|
device_mode = SYCL_SINGLE_GPU_MODE;
|
||||||
|
@ -217,23 +214,23 @@ void ggml_sycl_device_info::add_device_info(int id) {
|
||||||
ids.push_back(id);
|
ids.push_back(id);
|
||||||
devices.push_back(device);
|
devices.push_back(device);
|
||||||
|
|
||||||
device_infos[id].id = id;
|
infos[id].id = id;
|
||||||
device_infos[id].device = device;
|
infos[id].device = device;
|
||||||
device_infos[id].max_work_group_sizes = prop.get_max_work_group_size();
|
infos[id].max_work_group_sizes = prop.get_max_work_group_size();
|
||||||
device_infos[id].max_compute_units = prop.get_max_compute_units();
|
infos[id].max_compute_units = prop.get_max_compute_units();
|
||||||
device_infos[id].hw_family = get_device_family(&device);
|
infos[id].hw_family = get_device_family(&device);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_device_info::create_queues(int id) {
|
void ggml_sycl_device_info::create_queues(int id) {
|
||||||
for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
|
for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
|
||||||
device_infos[id].qptrs[i] = create_queue_for_device_id(id);
|
infos[id].qptrs[i] = create_queue_for_device_id(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_device_info::create_queues_for_devices() {
|
void ggml_sycl_device_info::create_queues_for_devices() {
|
||||||
for (auto &id: ids) {
|
for (auto &id: ids) {
|
||||||
for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
|
for (int i=0; i<GGML_SYCL_MAX_STREAMS;i++) {
|
||||||
device_infos[id].qptrs[i] = create_queue_for_device_id(id);
|
infos[id].qptrs[i] = create_queue_for_device_id(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -259,28 +256,28 @@ void ggml_sycl_device_info::print_gpu_device_list() {
|
||||||
"units:%d, to use any SYCL devices, set/export "
|
"units:%d, to use any SYCL devices, set/export "
|
||||||
"GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
|
"GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
|
||||||
fprintf(stderr, hint, get_device_count(), devices_list(),
|
fprintf(stderr, hint, get_device_count(), devices_list(),
|
||||||
device_infos[0].max_compute_units);
|
infos[0].max_compute_units);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_sycl_device_info::work_group_size(int id) {
|
int ggml_sycl_device_info::work_group_size(int id) {
|
||||||
GGML_ASSERT(is_allowed_device(id));
|
GGML_ASSERT(is_allowed_device(id));
|
||||||
return device_infos[id].max_work_group_sizes;
|
return infos[id].max_work_group_sizes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_device_info::update_mem() {
|
void ggml_sycl_device_info::update_mem() {
|
||||||
|
|
||||||
for (int i = 0; i < GGML_SYCL_MAX_DEVICES; ++i) {
|
for (int i = 0; i < GGML_SYCL_MAX_DEVICES; ++i) {
|
||||||
device_infos[i].vmm = 0;
|
infos[i].vmm = 0;
|
||||||
default_tensor_split[i] = 0;
|
default_tensor_split[i] = 0;
|
||||||
device_infos[i].cc =0;
|
infos[i].cc =0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t total_vram = 0;
|
int64_t total_vram = 0;
|
||||||
|
|
||||||
for (int i = 0; i < device_count; ++i) {
|
for (int i = 0; i < device_count; ++i) {
|
||||||
int id = get_device_id(i);
|
int id = get_device_id(i);
|
||||||
device_infos[id].vmm = 0;
|
infos[id].vmm = 0;
|
||||||
dpct::device_info prop;
|
dpct::device_info prop;
|
||||||
dpct::get_device_info(
|
dpct::get_device_info(
|
||||||
prop, dpct::dev_mgr::instance().get_device(id));
|
prop, dpct::dev_mgr::instance().get_device(id));
|
||||||
|
@ -289,7 +286,7 @@ void ggml_sycl_device_info::update_mem() {
|
||||||
default_tensor_split[i] = total_vram;
|
default_tensor_split[i] = total_vram;
|
||||||
total_vram += prop.get_global_mem_size();
|
total_vram += prop.get_global_mem_size();
|
||||||
|
|
||||||
device_infos[id].cc =
|
infos[id].cc =
|
||||||
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,7 +332,7 @@ int ggml_sycl_device_info::get_device_id(int device_index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_sycl_device_info::hw_family(int id) {
|
int ggml_sycl_device_info::hw_family(int id) {
|
||||||
return device_infos[id].hw_family;
|
return infos[id].hw_family;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool env_existed(const char *env_name) {
|
static inline bool env_existed(const char *env_name) {
|
||||||
|
|
|
@ -54,7 +54,7 @@ struct ggml_sycl_device_info {
|
||||||
sycl::context co_ctx;
|
sycl::context co_ctx;
|
||||||
int m_device_filter;
|
int m_device_filter;
|
||||||
|
|
||||||
sycl_device_info device_infos[GGML_SYCL_MAX_DEVICES];
|
sycl_device_info infos[GGML_SYCL_MAX_DEVICES];
|
||||||
std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
|
std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
|
||||||
|
|
||||||
ggml_sycl_device_info(int main_gpu_id);//single device mode
|
ggml_sycl_device_info(int main_gpu_id);//single device mode
|
||||||
|
@ -62,7 +62,7 @@ struct ggml_sycl_device_info {
|
||||||
void init(ggml_sycl_backend_device_filter device_filter);
|
void init(ggml_sycl_backend_device_filter device_filter);
|
||||||
void init_single_mode(int main_gpu_id);
|
void init_single_mode(int main_gpu_id);
|
||||||
|
|
||||||
void clear_device_infos();
|
void clear_infos();
|
||||||
void print_gpu_device_list();
|
void print_gpu_device_list();
|
||||||
int work_group_size(int device_id);
|
int work_group_size(int device_id);
|
||||||
bool is_allowed_device(int device_id);
|
bool is_allowed_device(int device_id);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue