This commit is contained in:
Neo Zhang Jianyu 2024-11-04 22:23:44 +00:00 committed by GitHub
commit 29abe69ef9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 58 additions and 38 deletions

View file

@ -42,12 +42,32 @@
static bool g_sycl_loaded = false; static bool g_sycl_loaded = false;
void print_cpu_detail() {
sycl::device device;
device = sycl::device(sycl::cpu_selector_v);
dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(
dpct::get_device_info(prop, device)));
std::string name = std::string(prop.get_name());
name = std::regex_replace(name, std::regex("\\(R\\)"), "");
name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
auto global_mem_size = prop.get_global_mem_size()/1000000;
std::string res= "[SYCL] CPU: ["+name+"] Memory: ["+std::to_string(global_mem_size)+"M]\n";
GGML_LOG_INFO("%s", res.c_str());
}
static ggml_sycl_device_info ggml_sycl_init() { static ggml_sycl_device_info ggml_sycl_init() {
ggml_sycl_device_info info = {}; ggml_sycl_device_info info = {};
print_cpu_detail();
info.device_count = dpct::dev_mgr::instance().device_count(); info.device_count = dpct::dev_mgr::instance().device_count();
if (info.device_count == 0) { if (info.device_count == 0) {
fprintf(stderr, "%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__); GGML_LOG_ERROR("%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
return info; return info;
} }
@ -55,16 +75,16 @@ static ggml_sycl_device_info ggml_sycl_init() {
int64_t total_vram = 0; int64_t total_vram = 0;
#if defined(GGML_SYCL_FORCE_MMQ) #if defined(GGML_SYCL_FORCE_MMQ)
fprintf(stderr, "%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__); GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__);
#else #else
fprintf(stderr, "%s: GGML_SYCL_FORCE_MMQ: no\n", __func__); GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: no\n", __func__);
#endif #endif
#if defined(SYCL_USE_XMX) #if defined(SYCL_USE_XMX)
fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__); GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
#else #else
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__); GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
#endif #endif
fprintf(stderr, "%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count); GGML_LOG_INFO("%s: found %d " GGML_SYCL_NAME " devices\n", __func__, info.device_count);
for (int i = 0; i < info.device_count; ++i) { for (int i = 0; i < info.device_count; ++i) {
info.devices[i].vmm = 0; info.devices[i].vmm = 0;
@ -110,7 +130,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
auto global_mem_size = prop.get_global_mem_size()/1000000; auto global_mem_size = prop.get_global_mem_size()/1000000;
fprintf(stderr, "|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(), GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
name.c_str(), version.c_str(), prop.get_max_compute_units(), name.c_str(), version.c_str(), prop.get_max_compute_units(),
prop.get_max_work_group_size(), prop.get_max_sub_group_size(), prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str()); global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
@ -120,11 +140,11 @@ void ggml_backend_sycl_print_sycl_devices() {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n"); GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
int device_count = dpct::dev_mgr::instance().device_count(); int device_count = dpct::dev_mgr::instance().device_count();
std::map<std::string, size_t> DeviceNums; std::map<std::string, size_t> DeviceNums;
fprintf(stderr, "found %d SYCL devices:\n", device_count); GGML_LOG_INFO("found %d SYCL devices:\n", device_count);
fprintf(stderr, "| | | | |Max | |Max |Global | |\n"); GGML_LOG_INFO("| | | | |Max | |Max |Global | |\n");
fprintf(stderr, "| | | | |compute|Max work|sub |mem | |\n"); GGML_LOG_INFO("| | | | |compute|Max work|sub |mem | |\n");
fprintf(stderr, "|ID| Device Type| Name|Version|units |group |group|size | Driver version|\n"); GGML_LOG_INFO("|ID| Device Type| Name|Version|units |group |group|size | Driver version|\n");
fprintf(stderr, "|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|\n"); GGML_LOG_INFO("|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|\n");
for (int id = 0; id < device_count; ++id) { for (int id = 0; id < device_count; ++id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id); sycl::device device = dpct::dev_mgr::instance().get_device(id);
sycl::backend backend = device.get_backend(); sycl::backend backend = device.get_backend();
@ -154,22 +174,22 @@ static void ggml_check_sycl() try {
static bool initialized = false; static bool initialized = false;
if (!initialized) { if (!initialized) {
fprintf(stderr, "[SYCL] call ggml_check_sycl\n"); GGML_LOG_DEBUG("[SYCL] call ggml_check_sycl\n");
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0); g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug); GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
#if defined(GGML_SYCL_F16) #if defined(GGML_SYCL_F16)
fprintf(stderr, "%s: GGML_SYCL_F16: yes\n", __func__); GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
#else #else
fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__); GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__);
#endif #endif
/* NOT REMOVE, keep it for next optimize for XMX. /* NOT REMOVE, keep it for next optimize for XMX.
#if defined(SYCL_USE_XMX) #if defined(SYCL_USE_XMX)
fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__); GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
#else #else
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__); GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
#endif #endif
*/ */
@ -205,7 +225,7 @@ inline void check_allow_gpu_index(const int device_index) {
__func__, __func__,
device_index, device_index,
ggml_sycl_info().device_count - 1); ggml_sycl_info().device_count - 1);
fprintf(stderr, "%s\n", error_buf); GGML_LOG_ERROR("%s\n", error_buf);
assert(false); assert(false);
} }
} }
@ -475,7 +495,7 @@ ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device( SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device(
size, *stream))); size, *stream)));
if (!dev_ptr) { if (!dev_ptr) {
fprintf(stderr, "%s: can't malloc %lu Bytes memory on device", __func__, size); GGML_LOG_ERROR("%s: can't malloc %lu Bytes memory on device\n", __func__, size);
return nullptr; return nullptr;
} }
ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream); ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream);
@ -1142,7 +1162,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device( CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device(
look_ahead_size, *qptr))); look_ahead_size, *qptr)));
if (!ptr) { if (!ptr) {
fprintf(stderr, "%s: can't malloc %lu Bytes memory on device", __func__, look_ahead_size); GGML_LOG_ERROR("%s: can't malloc %lu Bytes memory on device\n", __func__, look_ahead_size);
return nullptr; return nullptr;
} }
@ -1150,7 +1170,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
pool_size += look_ahead_size; pool_size += look_ahead_size;
#ifdef DEBUG_SYCL_MALLOC #ifdef DEBUG_SYCL_MALLOC
fprintf(stderr, "%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, id, nnz, GGML_LOG_DEBUG("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, id, nnz,
(uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024)); (uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
#endif #endif
// GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr); // GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
@ -1166,7 +1186,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
return; return;
} }
} }
fprintf(stderr, "WARNING: sycl buffer pool full, increase MAX_sycl_BUFFERS\n"); GGML_LOG_WARN("WARNING: sycl buffer pool full, increase MAX_sycl_BUFFERS\n");
SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr))); SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr)));
pool_size -= size; pool_size -= size;
} }
@ -2940,7 +2960,7 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te
break; break;
default: default:
// TODO: k-quants // TODO: k-quants
fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
break; break;
} }
@ -2968,7 +2988,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
op()(ctx, src0, src1, dst, (const int16_t *)src0_dd, (const int16_t *)src1_dd, (int16_t *)dst_dd, op()(ctx, src0, src1, dst, (const int16_t *)src0_dd, (const int16_t *)src1_dd, (int16_t *)dst_dd,
main_stream); main_stream);
} else { } else {
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, GGML_LOG_ERROR("%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type)); ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
@ -4592,7 +4612,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) { } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
} else { } else {
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__, GGML_LOG_ERROR("%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type)); ggml_type_name(src0->type), ggml_type_name(src1->type));
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
@ -4657,7 +4677,7 @@ void ggml_sycl_set_main_device(const int main_device) try {
dpct::device_info prop; dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
prop, dpct::dev_mgr::instance().get_device(main_device)))); prop, dpct::dev_mgr::instance().get_device(main_device))));
fprintf(stderr, "Using device %d (%s) as main device\n", GGML_LOG_DEBUG("Using device %d (%s) as main device\n",
main_device, prop.get_name()); main_device, prop.get_name());
} }
} }
@ -4964,7 +4984,7 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
#endif #endif
bool ok = ggml_sycl_compute_forward(*sycl_ctx, node); bool ok = ggml_sycl_compute_forward(*sycl_ctx, node);
if (!ok) { if (!ok) {
fprintf(stderr, "%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op)); GGML_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
} }
GGML_ASSERT(ok); GGML_ASSERT(ok);
} }
@ -5435,7 +5455,7 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
ggml_backend_sycl_context * ctx = new ggml_backend_sycl_context(device); ggml_backend_sycl_context * ctx = new ggml_backend_sycl_context(device);
if (ctx == nullptr) { if (ctx == nullptr) {
fprintf(stderr, "%s: error: failed to allocate context\n", __func__); GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
return nullptr; return nullptr;
}; };

View file

@ -28,9 +28,8 @@ void* ggml_sycl_host_malloc(size_t size) try {
if (err != 0) { if (err != 0) {
// clear the error // clear the error
fprintf( GGML_LOG_ERROR(
stderr, "Error: failed to allocate %.2f MB of pinned memory: %s\n",
"WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
size / 1024.0 / 1024.0, size / 1024.0 / 1024.0,
"syclGetErrorString is not supported"); "syclGetErrorString is not supported");
return nullptr; return nullptr;

View file

@ -18,6 +18,7 @@
#include "dpct/helper.hpp" #include "dpct/helper.hpp"
#include "ggml-sycl.h" #include "ggml-sycl.h"
#include "../src/ggml-impl.h"
#include "presets.hpp" #include "presets.hpp"
#if GGML_SYCL_DNNL #if GGML_SYCL_DNNL
#include "dnnl.hpp" #include "dnnl.hpp"
@ -31,11 +32,13 @@
void* ggml_sycl_host_malloc(size_t size); void* ggml_sycl_host_malloc(size_t size);
void ggml_sycl_host_free(void* ptr); void ggml_sycl_host_free(void* ptr);
#define GGML_SYCL_ERROR(string) GGML_LOG_ERROR("%s\nException caught at file:%s, line:%d, func:%s\n", string, __FILE__, __LINE__ ,__func__)
static int g_ggml_sycl_debug = 0; static int g_ggml_sycl_debug = 0;
#define GGML_SYCL_DEBUG(...) \ #define GGML_SYCL_DEBUG(...) \
do { \ do { \
if (g_ggml_sycl_debug) \ if (g_ggml_sycl_debug) \
fprintf(stderr, __VA_ARGS__); \ GGML_LOG_DEBUG(__VA_ARGS__); \
} while (0) } while (0)
#define CHECK_TRY_ERROR(expr) \ #define CHECK_TRY_ERROR(expr) \
@ -44,9 +47,7 @@ static int g_ggml_sycl_debug = 0;
expr; \ expr; \
return dpct::success; \ return dpct::success; \
} catch (std::exception const& e) { \ } catch (std::exception const& e) { \
std::cerr << e.what() << "\nException caught at file:" << __FILE__ \ GGML_SYCL_ERROR(e.what()); \
<< ", line:" << __LINE__ << ", func:" << __func__ \
<< std::endl; \
return dpct::default_error; \ return dpct::default_error; \
} \ } \
}() }()
@ -102,8 +103,8 @@ static void crash() {
const char* file, const char* file,
const int line, const int line,
const char* msg) { const char* msg) {
fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg); GGML_LOG_ERROR("SYCL error: %s: %s\n", stmt, msg);
fprintf(stderr, " in function %s at %s:%d\n", func, file, line); GGML_LOG_ERROR(" in function %s at %s:%d\n", func, file, line);
GGML_ABORT("SYCL error"); GGML_ABORT("SYCL error");
} }