sycl : Added device and backend reg interfaces
This commit is contained in:
parent
f1b746ae97
commit
fbad686918
5 changed files with 304 additions and 124 deletions
|
@ -151,7 +151,7 @@ static std::string get_gpu_info() {
|
||||||
int count = ggml_backend_sycl_get_device_count();
|
int count = ggml_backend_sycl_get_device_count();
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
char buf[128];
|
char buf[128];
|
||||||
ggml_sycl_get_device_description(i, buf, sizeof(buf));
|
ggml_backend_sycl_get_device_description(i, buf, sizeof(buf));
|
||||||
id += buf;
|
id += buf;
|
||||||
if (i < count - 1) {
|
if (i < count - 1) {
|
||||||
id += "/";
|
id += "/";
|
||||||
|
|
|
@ -19,6 +19,8 @@ extern "C" {
|
||||||
// backend API
|
// backend API
|
||||||
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
|
||||||
|
GGML_API bool ggml_backend_is_sycl(ggml_backend_t backend);
|
||||||
|
|
||||||
// devide buffer
|
// devide buffer
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||||
|
|
||||||
|
@ -29,14 +31,19 @@ GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const fl
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||||
|
|
||||||
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||||
GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
GGML_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
|
||||||
GGML_API void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
GGML_API void ggml_backend_sycl_get_device_description(int device,
|
||||||
|
char *description,
|
||||||
|
size_t description_size);
|
||||||
GGML_API int ggml_backend_sycl_get_device_count();
|
GGML_API int ggml_backend_sycl_get_device_count();
|
||||||
GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
|
|
||||||
// SYCL doesn't support registering host memory, keep here for reference
|
// SYCL doesn't support registering host memory, keep here for reference
|
||||||
// GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
// GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||||
// GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
// GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||||
|
|
||||||
|
GGML_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -534,6 +534,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
|
||||||
#include "ggml-metal.h"
|
#include "ggml-metal.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
#include "ggml-sycl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
struct ggml_backend_registry {
|
struct ggml_backend_registry {
|
||||||
std::vector<ggml_backend_reg_t> backends;
|
std::vector<ggml_backend_reg_t> backends;
|
||||||
std::vector<ggml_backend_dev_t> devices;
|
std::vector<ggml_backend_dev_t> devices;
|
||||||
|
@ -545,10 +549,12 @@ struct ggml_backend_registry {
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
register_backend(ggml_backend_metal_reg());
|
register_backend(ggml_backend_metal_reg());
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
register_backend(ggml_backend_sycl_reg());
|
||||||
|
#endif
|
||||||
register_backend(ggml_backend_cpu_reg());
|
register_backend(ggml_backend_cpu_reg());
|
||||||
|
|
||||||
// TODO: sycl, vulkan, kompute, cann
|
// TODO: vulkan, kompute, cann
|
||||||
}
|
}
|
||||||
|
|
||||||
void register_backend(ggml_backend_reg_t reg) {
|
void register_backend(ggml_backend_reg_t reg) {
|
||||||
|
@ -2210,6 +2216,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
||||||
sched->backends[b] = backends[b];
|
sched->backends[b] = backends[b];
|
||||||
sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
|
sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
|
||||||
GGML_ASSERT(ggml_backend_supports_buft(backends[b], sched->bufts[b]));
|
GGML_ASSERT(ggml_backend_supports_buft(backends[b], sched->bufts[b]));
|
||||||
|
|
||||||
if (sched->n_copies > 1) {
|
if (sched->n_copies > 1) {
|
||||||
for (int c = 0; c < sched->n_copies; c++) {
|
for (int c = 0; c < sched->n_copies; c++) {
|
||||||
sched->events[b][c] = ggml_backend_event_new(backends[b]->device);
|
sched->events[b][c] = ggml_backend_event_new(backends[b]->device);
|
||||||
|
|
|
@ -4038,8 +4038,8 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
|
GGML_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len) try {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_gpu_list\n");
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n");
|
||||||
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
||||||
|
|
||||||
for (int i=0;i< ggml_sycl_info().device_count;i++){
|
for (int i=0;i< ggml_sycl_info().device_count;i++){
|
||||||
|
@ -4068,9 +4068,9 @@ catch (sycl::exception const &exc) {
|
||||||
std::exit(1);
|
std::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_API void ggml_sycl_get_device_description(int device, char *description,
|
GGML_API void ggml_backend_sycl_get_device_description(int device, char *description,
|
||||||
size_t description_size) try {
|
size_t description_size) try {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_device_description\n");
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n");
|
||||||
dpct::device_info prop;
|
dpct::device_info prop;
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||||
prop, dpct::dev_mgr::instance().get_device(device))));
|
prop, dpct::dev_mgr::instance().get_device(device))));
|
||||||
|
@ -4317,7 +4317,7 @@ catch (sycl::exception const &exc) {
|
||||||
std::exit(1);
|
std::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
|
static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
|
||||||
/* .get_name = */ ggml_backend_sycl_buffer_get_name,
|
/* .get_name = */ ggml_backend_sycl_buffer_get_name,
|
||||||
/* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
|
/* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
|
||||||
/* .get_base = */ ggml_backend_sycl_buffer_get_base,
|
/* .get_base = */ ggml_backend_sycl_buffer_get_base,
|
||||||
|
@ -4339,7 +4339,7 @@ struct ggml_backend_sycl_buffer_type_context {
|
||||||
queue_ptr stream = nullptr;
|
queue_ptr stream = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * ggml_backend_sycl_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
static const char * ggml_backend_sycl_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
||||||
ggml_backend_sycl_buffer_type_context * ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
|
ggml_backend_sycl_buffer_type_context * ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
|
||||||
|
|
||||||
return ctx->name.c_str();
|
return ctx->name.c_str();
|
||||||
|
@ -4395,7 +4395,7 @@ static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_sycl_buffer_type_name,
|
/* .get_name = */ ggml_backend_sycl_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
|
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
|
||||||
|
@ -4409,22 +4409,24 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
||||||
|
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
||||||
|
|
||||||
if (device>=ggml_sycl_info().device_count or device<0) {
|
auto dev_count = ggml_backend_sycl_get_device_count();
|
||||||
|
|
||||||
|
if (device>=dev_count or device<0) {
|
||||||
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
||||||
device, ggml_sycl_info().device_count-1);
|
device, dev_count-1);
|
||||||
GGML_ASSERT(device<ggml_sycl_info().device_count);
|
GGML_ASSERT(device<dev_count);
|
||||||
}
|
}
|
||||||
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
|
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
|
||||||
|
|
||||||
static bool ggml_backend_sycl_buffer_type_initialized = false;
|
static bool ggml_backend_sycl_buffer_type_initialized = false;
|
||||||
|
|
||||||
if (!ggml_backend_sycl_buffer_type_initialized) {
|
if (!ggml_backend_sycl_buffer_type_initialized) {
|
||||||
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
|
for (int i = 0; i < dev_count; i++) {
|
||||||
auto & device_i = dpct::dev_mgr::instance().get_device(i);
|
auto & device_i = dpct::dev_mgr::instance().get_device(i);
|
||||||
queue_ptr stream = &(device_i.default_queue());
|
queue_ptr stream = &(device_i.default_queue());
|
||||||
ggml_backend_sycl_buffer_types[i] = {
|
ggml_backend_sycl_buffer_types[i] = {
|
||||||
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
||||||
/* .device = */ nullptr,
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), i),
|
||||||
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(i), stream},
|
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(i), stream},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -4744,7 +4746,7 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_split_buffer_interface = {
|
||||||
/* .reset = */ NULL,
|
/* .reset = */ NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * ggml_backend_sycl_split_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
static const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
||||||
return GGML_SYCL_NAME "_Split";
|
return GGML_SYCL_NAME "_Split";
|
||||||
|
|
||||||
UNUSED(buft);
|
UNUSED(buft);
|
||||||
|
@ -4799,7 +4801,7 @@ static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = {
|
static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_sycl_split_buffer_type_name,
|
/* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||||
|
@ -4839,7 +4841,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * ten
|
||||||
|
|
||||||
struct ggml_backend_buffer_type buft {
|
struct ggml_backend_buffer_type buft {
|
||||||
/* .iface = */ ggml_backend_sycl_split_buffer_type_interface,
|
/* .iface = */ ggml_backend_sycl_split_buffer_type_interface,
|
||||||
/* .device = */ nullptr,
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), 0),
|
||||||
/* .context = */ new ggml_backend_sycl_split_buffer_type_context{tensor_split_arr},
|
/* .context = */ new ggml_backend_sycl_split_buffer_type_context{tensor_split_arr},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -4893,7 +4895,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
||||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||||
},
|
},
|
||||||
/* .device = */ nullptr,
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), 0),
|
||||||
/* .context = */ nullptr,
|
/* .context = */ nullptr,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -4902,7 +4904,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
||||||
|
|
||||||
// backend
|
// backend
|
||||||
|
|
||||||
static const char * ggml_backend_sycl_name(ggml_backend_t backend) {
|
static const char * ggml_backend_sycl_get_name(ggml_backend_t backend) {
|
||||||
|
|
||||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
||||||
|
|
||||||
|
@ -5023,7 +5025,7 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
|
||||||
return GGML_STATUS_SUCCESS;
|
return GGML_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
|
static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||||
switch (op->op) {
|
switch (op->op) {
|
||||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||||
{
|
{
|
||||||
|
@ -5167,38 +5169,182 @@ static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_ten
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
UNUSED(backend);
|
UNUSED(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
static bool ggml_backend_sycl_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||||
const int min_batch_size = 32;
|
const int min_batch_size = 32;
|
||||||
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID;
|
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID;
|
||||||
GGML_UNUSED(backend);
|
GGML_UNUSED(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_backend_sycl_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
static void ggml_backend_sycl_event_record(ggml_backend_t backend, ggml_backend_event_t event)
|
||||||
if (buft->iface.get_name != ggml_backend_sycl_buffer_type_name) {
|
try
|
||||||
|
{
|
||||||
|
if (event == nullptr || event->context == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_backend_sycl_context *sycl_ctx =
|
||||||
|
(ggml_backend_sycl_context *)backend->context;
|
||||||
|
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
|
||||||
|
|
||||||
|
const queue_ptr &stream = sycl_ctx->stream(sycl_ctx->device, 0);
|
||||||
|
// Record the current state of the queue
|
||||||
|
*sycl_event = stream->ext_oneapi_submit_barrier();
|
||||||
|
}
|
||||||
|
catch (sycl::exception const &exc)
|
||||||
|
{
|
||||||
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||||
|
<< ", line:" << __LINE__ << std::endl;
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
|
||||||
|
if (event == nullptr || event->context == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_backend_sycl_context* sycl_ctx = static_cast<ggml_backend_sycl_context*>(backend->context);
|
||||||
|
sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
|
||||||
|
|
||||||
|
if (ggml_backend_is_sycl(backend)) {
|
||||||
|
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
|
||||||
|
} else
|
||||||
|
GGML_ABORT("fatal error");
|
||||||
|
} catch (sycl::exception const& exc) {
|
||||||
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||||
|
<< ", line:" << __LINE__ << std::endl;
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_i ggml_backend_sycl_interface = {
|
||||||
|
/* .get_name = */ ggml_backend_sycl_get_name,
|
||||||
|
/* .free = */ ggml_backend_sycl_free,
|
||||||
|
/* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type,
|
||||||
|
/* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async,
|
||||||
|
/* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async,
|
||||||
|
/* .cpy_tensor_async = */ NULL, // ggml_backend_sycl_cpy_tensor_async,
|
||||||
|
// // TODO: update for the new
|
||||||
|
// interface
|
||||||
|
/* .synchronize = */ ggml_backend_sycl_synchronize,
|
||||||
|
/* .graph_plan_create = */ NULL,
|
||||||
|
/* .graph_plan_free = */ NULL,
|
||||||
|
/* .graph_plan_update = */ NULL,
|
||||||
|
/* .graph_plan_compute = */ NULL,
|
||||||
|
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
||||||
|
/* .supports_op = */ NULL, // moved to device
|
||||||
|
/* .supports_buft = */ NULL, // moved to device
|
||||||
|
/* .offload_op = */ NULL, // moved to device
|
||||||
|
/* .event_record = */ ggml_backend_sycl_event_record,
|
||||||
|
/* .event_wait = */ ggml_backend_sycl_event_wait,
|
||||||
|
};
|
||||||
|
|
||||||
|
static ggml_guid_t ggml_backend_sycl_guid() {
|
||||||
|
static ggml_guid guid = { 0x58, 0x05, 0x13, 0x8f, 0xcd, 0x3a, 0x61, 0x9d, 0xe7, 0xcd, 0x98, 0xa9, 0x03, 0xfd, 0x7c, 0x53 };
|
||||||
|
return &guid;
|
||||||
|
}
|
||||||
|
|
||||||
|
// backend device
|
||||||
|
|
||||||
|
struct ggml_backend_sycl_device_context {
|
||||||
|
int device;
|
||||||
|
std::string name;
|
||||||
|
std::string description;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * ggml_backend_sycl_device_get_name(ggml_backend_dev_t dev) {
|
||||||
|
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
|
||||||
|
return ctx->name.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * ggml_backend_sycl_device_get_description(ggml_backend_dev_t dev) {
|
||||||
|
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
|
||||||
|
return ctx->description.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_backend_sycl_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||||
|
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
|
||||||
|
ggml_sycl_set_device(ctx->device);
|
||||||
|
SYCL_CHECK(CHECK_TRY_ERROR(
|
||||||
|
dpct::dev_mgr::instance().get_device(ctx->device).get_memory_info(*free, *total)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ggml_backend_dev_type ggml_backend_sycl_device_get_type(ggml_backend_dev_t dev) {
|
||||||
|
GGML_UNUSED(dev);
|
||||||
|
return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_backend_sycl_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||||
|
props->name = ggml_backend_sycl_device_get_name(dev);
|
||||||
|
props->description = ggml_backend_sycl_device_get_description(dev);
|
||||||
|
props->type = ggml_backend_sycl_device_get_type(dev);
|
||||||
|
ggml_backend_sycl_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||||
|
|
||||||
|
bool host_buffer = getenv("GGML_SYCL_NO_PINNED") == nullptr;
|
||||||
|
#ifdef GGML_SYCL_NO_PEER_COPY
|
||||||
|
bool events = false;
|
||||||
|
#else
|
||||||
|
bool events = true;
|
||||||
|
#endif
|
||||||
|
props->caps = {
|
||||||
|
/* async */ true,
|
||||||
|
/* host_buffer */ host_buffer,
|
||||||
|
/* .buffer_from_host_ptr = */ false,
|
||||||
|
/* events */ events,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_t ggml_backend_sycl_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||||
|
GGML_UNUSED(params);
|
||||||
|
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
|
||||||
|
return ggml_backend_sycl_init(ctx->device);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_buffer_type_t ggml_backend_sycl_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||||
|
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
|
||||||
|
return ggml_backend_sycl_buffer_type(ctx->device);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_buffer_type_t ggml_backend_sycl_device_get_host_buffer_type(ggml_backend_dev_t dev) {
|
||||||
|
GGML_UNUSED(dev);
|
||||||
|
return ggml_backend_sycl_host_buffer_type();
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_buffer_t ggml_backend_sycl_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
||||||
|
GGML_UNUSED(dev);
|
||||||
|
GGML_UNUSED(ptr);
|
||||||
|
GGML_UNUSED(size);
|
||||||
|
GGML_UNUSED(max_tensor_size);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ggml_backend_sycl_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
||||||
|
if (buft->iface.get_name != ggml_backend_sycl_buffer_type_get_name) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
|
ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
|
||||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
ggml_backend_sycl_device_context * sycl_ctx = (ggml_backend_sycl_device_context *)dev->context;
|
||||||
return buft_ctx->device == sycl_ctx->device;
|
return buft_ctx->device == sycl_ctx->device;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_event_t
|
static ggml_backend_event_t
|
||||||
ggml_backend_sycl_event_new(ggml_backend_t backend) {
|
ggml_backend_sycl_device_event_new(ggml_backend_dev_t dev) {
|
||||||
ggml_backend_sycl_context *sycl_ctx =
|
|
||||||
(ggml_backend_sycl_context *)backend->context;
|
|
||||||
|
|
||||||
|
#ifdef GGML_SYCL_NO_PEER_COPY
|
||||||
|
return nullptr;
|
||||||
|
#else
|
||||||
sycl::event *event_ptr = new sycl::event();
|
sycl::event *event_ptr = new sycl::event();
|
||||||
|
|
||||||
return new ggml_backend_event{
|
return new ggml_backend_event{
|
||||||
/* .backend = */ backend,
|
/* .device = */ dev,
|
||||||
/* .context = */ event_ptr,
|
/* .context = */ event_ptr,
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_sycl_event_free(ggml_backend_event_t event) try {
|
static void ggml_backend_sycl_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) try {
|
||||||
|
GGML_UNUSED(dev);
|
||||||
if (event == nullptr) {
|
if (event == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -5216,25 +5362,10 @@ static void ggml_backend_sycl_event_free(ggml_backend_event_t event) try {
|
||||||
std::exit(1);
|
std::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_sycl_event_record(ggml_backend_event_t event) try {
|
|
||||||
if (event == nullptr || event->context == nullptr) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ggml_backend_sycl_context *sycl_ctx =
|
static void ggml_backend_sycl_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
||||||
(ggml_backend_sycl_context *)event->backend->context;
|
GGML_UNUSED(dev);
|
||||||
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
|
|
||||||
|
|
||||||
const queue_ptr &stream = sycl_ctx->stream(sycl_ctx->device, 0);
|
|
||||||
// Record the current state of the queue
|
|
||||||
*sycl_event = stream->ext_oneapi_submit_barrier();
|
|
||||||
} catch (sycl::exception const &exc) {
|
|
||||||
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
||||||
<< ", line:" << __LINE__ << std::endl;
|
|
||||||
std::exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ggml_backend_sycl_event_synchronize(ggml_backend_event_t event) {
|
|
||||||
if (event == nullptr || event->context == nullptr) {
|
if (event == nullptr || event->context == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -5243,31 +5374,106 @@ static void ggml_backend_sycl_event_synchronize(ggml_backend_event_t event) {
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
|
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_i ggml_backend_sycl_interface = {
|
static const ggml_backend_device_i ggml_backend_sycl_device_interface = {
|
||||||
/* .get_name = */ ggml_backend_sycl_name,
|
/* .get_name = */ ggml_backend_sycl_device_get_name,
|
||||||
/* .free = */ ggml_backend_sycl_free,
|
/* .get_description = */ ggml_backend_sycl_device_get_description,
|
||||||
/* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type,
|
/* .get_memory = */ ggml_backend_sycl_device_get_memory,
|
||||||
/* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async,
|
/* .get_type = */ ggml_backend_sycl_device_get_type,
|
||||||
/* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async,
|
/* .get_props = */ ggml_backend_sycl_device_get_props,
|
||||||
/* .cpy_tensor_async = */ NULL, // ggml_backend_sycl_cpy_tensor_async,
|
/* .init_backend = */ ggml_backend_sycl_device_init,
|
||||||
// // TODO: update for the new
|
/* .get_buffer_type = */ ggml_backend_sycl_device_get_buffer_type,
|
||||||
// interface
|
/* .get_host_buffer_type = */ ggml_backend_sycl_device_get_host_buffer_type,
|
||||||
/* .synchronize = */ ggml_backend_sycl_synchronize,
|
/* .buffer_from_host_ptr = */ ggml_backend_sycl_device_buffer_from_host_ptr,
|
||||||
/* .graph_plan_create = */ NULL,
|
/* .supports_op = */ ggml_backend_sycl_device_supports_op,
|
||||||
/* .graph_plan_free = */ NULL,
|
/* .supports_buft = */ ggml_backend_sycl_device_supports_buft,
|
||||||
/* .graph_plan_update = */ NULL,
|
/* .offload_op = */ ggml_backend_sycl_device_offload_op,
|
||||||
/* .graph_plan_compute = */ NULL,
|
/* .event_new = */ ggml_backend_sycl_device_event_new,
|
||||||
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
/* .event_free = */ ggml_backend_sycl_device_event_free,
|
||||||
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
/* .event_synchronize = */ ggml_backend_sycl_device_event_synchronize,
|
||||||
/* .supports_buft = */ ggml_backend_sycl_supports_buft,
|
|
||||||
/* .offload_op = */ ggml_backend_sycl_offload_op,
|
|
||||||
/* .event_record = */ NULL,
|
|
||||||
/* .event_wait = */ NULL,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static ggml_guid_t ggml_backend_sycl_guid() {
|
// backend reg
|
||||||
static ggml_guid guid = { 0x58, 0x05, 0x13, 0x8f, 0xcd, 0x3a, 0x61, 0x9d, 0xe7, 0xcd, 0x98, 0xa9, 0x03, 0xfd, 0x7c, 0x53 };
|
struct ggml_backend_sycl_reg_context {
|
||||||
return &guid;
|
std::vector<ggml_backend_dev_t> devices;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * ggml_backend_sycl_reg_get_name(ggml_backend_reg_t reg) {
|
||||||
|
GGML_UNUSED(reg);
|
||||||
|
return GGML_SYCL_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t ggml_backend_sycl_reg_get_device_count(ggml_backend_reg_t reg) {
|
||||||
|
ggml_backend_sycl_reg_context * ctx = (ggml_backend_sycl_reg_context *)reg->context;
|
||||||
|
return ctx->devices.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
||||||
|
ggml_backend_sycl_reg_context * ctx = (ggml_backend_sycl_reg_context *)reg->context;
|
||||||
|
GGML_ASSERT(index < ctx->devices.size());
|
||||||
|
return ctx->devices[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(reg);
|
||||||
|
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
||||||
|
return (void *)ggml_backend_sycl_split_buffer_type;
|
||||||
|
}
|
||||||
|
// SYCL doesn't support registering host memory, left here for reference
|
||||||
|
// "ggml_backend_register_host_buffer"
|
||||||
|
// "ggml_backend_unregister_host_buffer"
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const ggml_backend_reg_i ggml_backend_sycl_reg_interface = {
|
||||||
|
/* .get_name = */ ggml_backend_sycl_reg_get_name,
|
||||||
|
/* .get_device_count = */ ggml_backend_sycl_reg_get_device_count,
|
||||||
|
/* .get_device_get = */ ggml_backend_sycl_reg_get_device,
|
||||||
|
/* .get_proc_address = */ ggml_backend_sycl_reg_get_proc_address,
|
||||||
|
};
|
||||||
|
|
||||||
|
// backend registry
|
||||||
|
ggml_backend_reg_t ggml_backend_sycl_reg() {
|
||||||
|
static ggml_backend_reg reg;
|
||||||
|
static bool initialized = false;
|
||||||
|
|
||||||
|
{
|
||||||
|
static std::mutex mutex;
|
||||||
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
if (!initialized) {
|
||||||
|
ggml_backend_sycl_reg_context * ctx = new ggml_backend_sycl_reg_context;
|
||||||
|
|
||||||
|
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
|
||||||
|
ggml_backend_sycl_device_context * dev_ctx = new ggml_backend_sycl_device_context;
|
||||||
|
dev_ctx->device = i;
|
||||||
|
dev_ctx->name = GGML_SYCL_NAME + std::to_string(i);
|
||||||
|
|
||||||
|
ggml_sycl_set_device(i);
|
||||||
|
|
||||||
|
dpct::device_info prop;
|
||||||
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||||
|
prop, dpct::dev_mgr::instance().get_device(i))));
|
||||||
|
|
||||||
|
dev_ctx->description = prop.get_name();
|
||||||
|
|
||||||
|
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||||
|
/* .interface = */ ggml_backend_sycl_device_interface,
|
||||||
|
/* .reg = */ ®,
|
||||||
|
/* .context = */ dev_ctx
|
||||||
|
};
|
||||||
|
ctx->devices.push_back(dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
reg = ggml_backend_reg {
|
||||||
|
/* .interface = */ ggml_backend_sycl_reg_interface,
|
||||||
|
/* .context = */ ctx
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ®
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_t ggml_backend_sycl_init(int device) {
|
ggml_backend_t ggml_backend_sycl_init(int device) {
|
||||||
|
@ -5285,7 +5491,7 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
|
||||||
ggml_backend_t sycl_backend = new ggml_backend {
|
ggml_backend_t sycl_backend = new ggml_backend {
|
||||||
/* .guid = */ ggml_backend_sycl_guid(),
|
/* .guid = */ ggml_backend_sycl_guid(),
|
||||||
/* .interface = */ ggml_backend_sycl_interface,
|
/* .interface = */ ggml_backend_sycl_interface,
|
||||||
/* .device = */ nullptr,
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), device),
|
||||||
/* .context = */ ctx
|
/* .context = */ ctx
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -3413,9 +3413,7 @@ static int llama_get_device_count(const llama_model & model) {
|
||||||
count += (int) model.rpc_servers.size();
|
count += (int) model.rpc_servers.size();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(GGML_USE_SYCL)
|
#if defined(GGML_USE_VULKAN)
|
||||||
count += ggml_backend_sycl_get_device_count();
|
|
||||||
#elif defined(GGML_USE_VULKAN)
|
|
||||||
count += ggml_backend_vk_get_device_count();
|
count += ggml_backend_vk_get_device_count();
|
||||||
#elif defined(GGML_USE_CANN)
|
#elif defined(GGML_USE_CANN)
|
||||||
count += ggml_backend_cann_get_device_count();
|
count += ggml_backend_cann_get_device_count();
|
||||||
|
@ -3438,11 +3436,7 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(const llama_mode
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(GGML_USE_SYCL)
|
#if defined(GGML_USE_CANN)
|
||||||
if (host_buffer) {
|
|
||||||
buft = ggml_backend_sycl_host_buffer_type();
|
|
||||||
}
|
|
||||||
#elif defined(GGML_USE_CANN)
|
|
||||||
if (host_buffer) {
|
if (host_buffer) {
|
||||||
buft = ggml_backend_cann_host_buffer_type();
|
buft = ggml_backend_cann_host_buffer_type();
|
||||||
}
|
}
|
||||||
|
@ -3481,8 +3475,6 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_
|
||||||
|
|
||||||
#if defined(GGML_USE_VULKAN)
|
#if defined(GGML_USE_VULKAN)
|
||||||
buft = ggml_backend_vk_buffer_type(device);
|
buft = ggml_backend_vk_buffer_type(device);
|
||||||
#elif defined(GGML_USE_SYCL)
|
|
||||||
buft = ggml_backend_sycl_buffer_type(device);
|
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
buft = ggml_backend_kompute_buffer_type(device);
|
buft = ggml_backend_kompute_buffer_type(device);
|
||||||
#elif defined(GGML_USE_CANN)
|
#elif defined(GGML_USE_CANN)
|
||||||
|
@ -3513,12 +3505,6 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_split(const llama_mo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
if (ggml_backend_sycl_get_device_count() > 1) {
|
|
||||||
buft = ggml_backend_sycl_split_buffer_type(tensor_split);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (buft == nullptr) {
|
if (buft == nullptr) {
|
||||||
buft = llama_default_buffer_type_offload(model, fallback_gpu);
|
buft = llama_default_buffer_type_offload(model, fallback_gpu);
|
||||||
}
|
}
|
||||||
|
@ -3548,12 +3534,7 @@ static size_t llama_get_device_memory(const llama_model & model, int device) {
|
||||||
return free;
|
return free;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(GGML_USE_SYCL)
|
#if defined(GGML_USE_VULKAN)
|
||||||
size_t total;
|
|
||||||
size_t free;
|
|
||||||
ggml_backend_sycl_get_device_memory(device, &free, &total);
|
|
||||||
return free;
|
|
||||||
#elif defined(GGML_USE_VULKAN)
|
|
||||||
size_t total;
|
size_t total;
|
||||||
size_t free;
|
size_t free;
|
||||||
ggml_backend_vk_get_device_memory(device, &free, &total);
|
ggml_backend_vk_get_device_memory(device, &free, &total);
|
||||||
|
@ -19021,10 +19002,11 @@ bool llama_supports_mlock(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool llama_supports_gpu_offload(void) {
|
bool llama_supports_gpu_offload(void) {
|
||||||
#if defined(GGML_USE_VULKAN) || \
|
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || \
|
||||||
defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC)
|
defined(GGML_USE_RPC)
|
||||||
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
// Defined when llama.cpp is compiled with support for offloading model layers
|
||||||
return true;
|
// to GPU.
|
||||||
|
return true;
|
||||||
#else
|
#else
|
||||||
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
||||||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr;
|
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU_FULL) != nullptr;
|
||||||
|
@ -19349,28 +19331,6 @@ struct llama_context * llama_new_context_with_model(
|
||||||
ctx->backends.push_back(backend);
|
ctx->backends.push_back(backend);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_SYCL)
|
|
||||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
|
||||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(main_gpu);
|
|
||||||
if (backend == nullptr) {
|
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, main_gpu);
|
|
||||||
llama_free(ctx);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
ctx->backends.push_back(backend);
|
|
||||||
} else {
|
|
||||||
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
|
||||||
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
|
||||||
if (backend == nullptr) {
|
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d for No.%d backend\n", __func__, i, i);
|
|
||||||
llama_free(ctx);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
ctx->backends.push_back(backend);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
if (model->n_gpu_layers > 0) {
|
if (model->n_gpu_layers > 0) {
|
||||||
auto * backend = ggml_backend_kompute_init(main_gpu);
|
auto * backend = ggml_backend_kompute_init(main_gpu);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue