Add remaining SYCL exception handler to kernel and refactor

This commit is contained in:
Akarshan Biswas 2025-02-03 18:44:49 +05:30
parent 7369e54b33
commit e5926374a5
No known key found for this signature in database
GPG key ID: 52A578A14B32134D
27 changed files with 216 additions and 103 deletions

View file

@ -52,7 +52,7 @@ static void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * d
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_I32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ncols = dst->src[0]->ne[0]; const int64_t ncols = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(dst->src[0]); const int64_t nrows = ggml_nrows(dst->src[0]);

View file

@ -105,6 +105,7 @@ static void argsort_f32_i32_sycl(const float * x, int * dst, const int ncols, co
inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_I32);
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ncols = dst->src[0]->ne[0]; const int64_t ncols = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(dst->src[0]); const int64_t nrows = ggml_nrows(dst->src[0]);

View file

@ -233,6 +233,8 @@ inline void ggml_sycl_op_bin_bcast(const ggml_tensor * src0, const ggml_tensor *
} }
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const void * src0_dd = static_cast<void *>(dst->src[0]->data); const void * src0_dd = static_cast<void *>(dst->src[0]->data);
const void * src1_dd = static_cast<void *>(dst->src[1]->data); const void * src1_dd = static_cast<void *>(dst->src[1]->data);
void * dst_dd = static_cast<void *>(dst->data); void * dst_dd = static_cast<void *>(dst->data);
@ -247,6 +249,8 @@ inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
} }
inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const void * src0_dd = static_cast<void *>(dst->src[0]->data); const void * src0_dd = static_cast<void *>(dst->src[0]->data);
const void * src1_dd = static_cast<void *>(dst->src[1]->data); const void * src1_dd = static_cast<void *>(dst->src[1]->data);
void * dst_dd = static_cast<void *>(dst->data); void * dst_dd = static_cast<void *>(dst->data);
@ -261,6 +265,8 @@ inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
} }
inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const void * src0_dd = static_cast<void *>(dst->src[0]->data); const void * src0_dd = static_cast<void *>(dst->src[0]->data);
const void * src1_dd = static_cast<void *>(dst->src[1]->data); const void * src1_dd = static_cast<void *>(dst->src[1]->data);
void * dst_dd = static_cast<void *>(dst->data); void * dst_dd = static_cast<void *>(dst->data);
@ -275,6 +281,8 @@ inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
} }
inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const void * src0_dd = static_cast<void *>(dst->src[0]->data); const void * src0_dd = static_cast<void *>(dst->src[0]->data);
const void * src1_dd = static_cast<void *>(dst->src[1]->data); const void * src1_dd = static_cast<void *>(dst->src[1]->data);
void * dst_dd = static_cast<void *>(dst->data); void * dst_dd = static_cast<void *>(dst->data);
@ -289,6 +297,7 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
} }
inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const void * src0_d = static_cast<void *>(dst->src[0]->data); const void * src0_d = static_cast<void *>(dst->src[0]->data);
void * dst_d = static_cast<void *>(dst->data); void * dst_d = static_cast<void *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream(); dpct::queue_ptr main_stream = ctx.stream();

View file

@ -23,7 +23,7 @@ static void clamp_f32_sycl(const float * x, float * dst, const float min, const
inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
float min; float min;
float max; float max;

View file

@ -52,6 +52,16 @@ bool gpu_has_xmx(sycl::device &dev) {
return dev.has(sycl::aspect::ext_intel_matrix); return dev.has(sycl::aspect::ext_intel_matrix);
} }
const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
return GGML_SYCL_NAME "_Split";
GGML_UNUSED(buft);
}
bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) {
return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name;
}
int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) { int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
const int64_t max_range = std::numeric_limits<int>::max(); const int64_t max_range = std::numeric_limits<int>::max();
int64_t sycl_down_blk_size = block_size; int64_t sycl_down_blk_size = block_size;

View file

@ -436,6 +436,8 @@ typedef void (*ggml_sycl_op_flatten_t)(ggml_backend_sycl_context & ctx, const gg
const queue_ptr &main_stream); const queue_ptr &main_stream);
bool gpu_has_xmx(sycl::device &dev); bool gpu_has_xmx(sycl::device &dev);
const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft);
bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer);
// Some backend specific macros // Some backend specific macros
#define GGML_SYCL_TENSOR_BINARY_OP_LOCALS \ #define GGML_SYCL_TENSOR_BINARY_OP_LOCALS \

View file

@ -159,34 +159,34 @@ static void concat_f32_sycl_non_cont(
} }
static void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { static void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
const ggml_tensor *src0 = dst->src[0]; GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
const ggml_tensor *src1 = dst->src[1]; GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
queue_ptr stream = ctx.stream(); const ggml_tensor * src0 = dst->src[0];
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const ggml_tensor * src1 = dst->src[1];
queue_ptr stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const int32_t dim = ((int32_t *)dst->op_params)[0]; const int32_t dim = ((int32_t *) dst->op_params)[0];
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) { if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
const float *src0_d = (const float *)src0->data; const float * src0_d = (const float *) src0->data;
const float *src1_d = (const float *)src1->data; const float * src1_d = (const float *) src1->data;
float *dst_d = (float *)dst->data; float * dst_d = (float *) dst->data;
if (dim != 3) { if (dim != 3) {
for (int i3 = 0; i3 < dst->ne[3]; i3++) { for (int i3 = 0; i3 < dst->ne[3]; i3++) {
concat_f32_sycl( concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4), dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], dst->ne[1], dst->ne[2], dim, stream);
src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream); }
} } else {
} else { const size_t size0 = ggml_nbytes(src0);
const size_t size0 = ggml_nbytes(src0); const size_t size1 = ggml_nbytes(src1);
const size_t size1 = ggml_nbytes(src1);
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait())); SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
SYCL_CHECK(CHECK_TRY_ERROR( SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait())); }
}
} else } else
concat_f32_sycl_non_cont( concat_f32_sycl_non_cont(
stream, (const char *)src0->data, (const char *)src1->data, stream, (const char *)src0->data, (const char *)src1->data,

View file

@ -71,7 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl(
}); });
} }
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { static void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const ggml_tensor *src0 = dst->src[0]; const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1]; const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data; const float * src0_d = (const float *)src0->data;
@ -97,4 +99,13 @@ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor
src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[0], src0->ne[1], src0->ne[2],
src1->ne[0], dst->ne[0], src1->ne[0], dst->ne[0],
src0_d, src1_d, dst_d, stream); src0_d, src1_d, dst_d, stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
void ggml_sycl_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_conv_transpose_1d(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}

View file

@ -15,6 +15,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst); void ggml_sycl_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_CONV_HPP #endif // GGML_SYCL_CONV_HPP

View file

@ -339,13 +339,14 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co
} }
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try { void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer));
const int64_t ne = ggml_nelements(src0); const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1)); GGML_ASSERT(ne == ggml_nelements(src1));
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS; GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
queue_ptr main_stream = ctx.stream(); queue_ptr main_stream = ctx.stream();

View file

@ -29,7 +29,7 @@ static void diag_mask_inf_f32_sycl(const float * x, float * dst, const int ncols
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ne00 = dst->src[0]->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t ne01 = dst->src[0]->ne[1]; const int64_t ne01 = dst->src[0]->ne[1];

View file

@ -508,87 +508,105 @@ void pad_f32_sycl(const float *x, float *dst, const int ne00,
}); });
} }
inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
@ -596,115 +614,142 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
log_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); log_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream(); dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
step_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); step_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream(); dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
float negative_slope; float negative_slope;
memcpy(&negative_slope, dst->op_params, sizeof(float)); memcpy(&negative_slope, dst->op_params, sizeof(float));
@ -715,26 +760,32 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), negative_slope, main_stream); leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), negative_slope, main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream(); dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const float sf0 = (float)dst->ne[0]/dst->src[0]->ne[0]; const float sf0 = (float)dst->ne[0]/dst->src[0]->ne[0];
const float sf1 = (float)dst->ne[1]/dst->src[0]->ne[1]; const float sf1 = (float)dst->ne[1]/dst->src[0]->ne[1];
@ -749,14 +800,17 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor *
upscale_f32_sycl(src0_dd, dst_dd, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], dst->src[0]->nb[3], upscale_f32_sycl(src0_dd, dst_dd, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], dst->src[0]->nb[3],
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
main_stream); main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data); float * dst_dd = static_cast<float *>(dst->data);
@ -766,17 +820,20 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
pad_f32_sycl(src0_dd, dst_dd, pad_f32_sycl(src0_dd, dst_dd,
dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2],
dst->ne[0], dst->ne[1], dst->ne[2], main_stream); dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx,
ggml_tensor *dst) { ggml_tensor *dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported
GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const dpct::queue_ptr main_stream = ctx.stream(); const dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
@ -790,6 +847,9 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx,
int offset = dst->op_params[3] / 4; // offset in bytes int offset = dst->op_params[3] / 4; // offset in bytes
acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream); acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {

View file

@ -126,15 +126,15 @@ template <typename src0_t> static void get_rows_sycl_float(ggml_backend_sycl_con
} }
} }
void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(dst->src[0]->nb[0] == ggml_type_size(dst->src[0]->type)); GGML_ASSERT(dst->src[0]->nb[0] == ggml_type_size(dst->src[0]->type));
GGML_ASSERT(dst->src[1]->nb[0] == ggml_type_size(dst->src[1]->type)); GGML_ASSERT(dst->src[1]->nb[0] == ggml_type_size(dst->src[1]->type));
GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type)); GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type));
GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
switch (dst->src[0]->type) { switch (dst->src[0]->type) {
case GGML_TYPE_F16: case GGML_TYPE_F16:
@ -164,4 +164,13 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
break; break;
} }
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
}
void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_get_rows(ctx, dst);
GGML_SYCL_DEBUG("call %s\n", __func__);
} }

View file

@ -3,6 +3,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_GETROWS_HPP #endif // GGML_SYCL_GETROWS_HPP

View file

@ -911,17 +911,6 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_split_buffer_interface = {
}; };
// sycl split buffer type // sycl split buffer type
static const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
return GGML_SYCL_NAME "_Split";
GGML_UNUSED(buft);
}
static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) {
return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name;
}
static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
// since we don't know the exact split after rounding, we cannot allocate the device buffers at this point // since we don't know the exact split after rounding, we cannot allocate the device buffers at this point
// instead, we allocate them for each tensor separately in init_tensor // instead, we allocate them for each tensor separately in init_tensor
@ -2686,13 +2675,13 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
ggml_sycl_argmax(ctx, dst); ggml_sycl_argmax(ctx, dst);
break; break;
case GGML_OP_CONV_TRANSPOSE_1D: case GGML_OP_CONV_TRANSPOSE_1D:
ggml_sycl_op_conv_transpose_1d(ctx, dst); ggml_sycl_conv_transpose_1d(ctx, dst);
break; break;
case GGML_OP_REPEAT: case GGML_OP_REPEAT:
ggml_sycl_repeat(ctx, dst); ggml_sycl_repeat(ctx, dst);
break; break;
case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS:
ggml_sycl_op_get_rows(ctx, dst); ggml_sycl_get_rows(ctx, dst);
break; break;
case GGML_OP_DUP: case GGML_OP_DUP:
ggml_sycl_dup(ctx, dst); ggml_sycl_dup(ctx, dst);
@ -2854,7 +2843,7 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
ggml_sycl_op_rwkv_wkv6(ctx, dst); ggml_sycl_op_rwkv_wkv6(ctx, dst);
break; break;
case GGML_OP_GATED_LINEAR_ATTN: case GGML_OP_GATED_LINEAR_ATTN:
ggml_sycl_op_gated_linear_attn(ctx, dst); ggml_sycl_gated_linear_attn(ctx, dst);
break; break;
default: default:
return false; return false;

View file

@ -75,7 +75,7 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B,
}); });
} }
void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
const float * k_d = static_cast<const float *>(dst->src[0]->data); const float * k_d = static_cast<const float *>(dst->src[0]->data);
const float * v_d = static_cast<const float *>(dst->src[1]->data); const float * v_d = static_cast<const float *>(dst->src[1]->data);
const float * r_d = static_cast<const float *>(dst->src[2]->data); const float * r_d = static_cast<const float *>(dst->src[2]->data);
@ -103,4 +103,13 @@ void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor
} else { } else {
gated_linear_attn_f32_kernel<128>(stream, B, T, C, H, scale, k_d, v_d, r_d, td_d, s_d, dst_d); gated_linear_attn_f32_kernel<128>(stream, B, T, C, H, scale, k_d, v_d, r_d, td_d, s_d, dst_d);
} }
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
}
void ggml_sycl_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_gated_linear_attn(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__);
} }

View file

@ -3,6 +3,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_GLA_HPP #endif // GGML_SYCL_GLA_HPP

View file

@ -86,7 +86,7 @@ static void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * d
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F16);
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; const int32_t s0 = ((const int32_t*)(dst->op_params))[0];

View file

@ -315,7 +315,7 @@ static void ggml_sycl_op_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ne00 = dst->src[0]->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(dst->src[0]); const int64_t nrows = ggml_nrows(dst->src[0]);
@ -338,7 +338,7 @@ static void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor*
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
int num_groups = dst->op_params[0]; int num_groups = dst->op_params[0];
@ -360,7 +360,7 @@ static void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor *
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ne00 = dst->src[0]->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(dst->src[0]); const int64_t nrows = ggml_nrows(dst->src[0]);

View file

@ -39,6 +39,7 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const oneapi::mkl::transpose src1_op = const oneapi::mkl::transpose src1_op =
src1_T ? oneapi::mkl::transpose::nontrans : oneapi::mkl::transpose::trans; src1_T ? oneapi::mkl::transpose::nontrans : oneapi::mkl::transpose::trans;
const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float); const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float);
GGML_SYCL_DEBUG("call %s\n", __func__);
try { try {
// Perform matrix multiplication using oneMKL GEMM // Perform matrix multiplication using oneMKL GEMM
@ -55,4 +56,5 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
std::cerr << exc.what() << std::endl; std::cerr << exc.what() << std::endl;
GGML_ASSERT(false); GGML_ASSERT(false);
} }
GGML_SYCL_DEBUG("call %s done\n", __func__);
} }

View file

@ -71,7 +71,7 @@ static void pool2d_nchw_kernel(const int ih, const int iw, const int oh, const i
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int32_t * opts = (const int32_t *) dst->op_params; const int32_t * opts = (const int32_t *) dst->op_params;
enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]); enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);

View file

@ -197,8 +197,8 @@ static void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
GGML_ASSERT(dst->src[0]->type == dst->type); GGML_ASSERT(dst->src[0]->type == dst->type);
GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ne00 = dst->src[0]->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t ne01 = dst->src[0]->ne[1]; const int64_t ne01 = dst->src[0]->ne[1];

View file

@ -21,7 +21,7 @@ static void scale_f32_sycl(const float * x, float * dst, const float scale, cons
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
float scale; float scale;
memcpy(&scale, dst->op_params, sizeof(float)); memcpy(&scale, dst->op_params, sizeof(float));

View file

@ -228,6 +228,7 @@ static void ggml_sycl_op_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor *
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32);
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
GGML_ASSERT(!dst->src[1] || dst->src[1]->type == GGML_TYPE_F16 || dst->src[1]->type == GGML_TYPE_F32); // src1 contains mask and it is optional GGML_ASSERT(!dst->src[1] || dst->src[1]->type == GGML_TYPE_F16 || dst->src[1]->type == GGML_TYPE_F32); // src1 contains mask and it is optional

View file

@ -27,7 +27,7 @@ static void sum_rows_f32_sycl(const float * x, float * dst, const int ncols, con
inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ne = ggml_nelements(dst->src[0]); const int64_t ne = ggml_nelements(dst->src[0]);
dpct::queue_ptr main_stream = ctx.stream(); dpct::queue_ptr main_stream = ctx.stream();
@ -44,7 +44,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
const int64_t ncols = dst->src[0]->ne[0]; const int64_t ncols = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(dst->src[0]); const int64_t nrows = ggml_nrows(dst->src[0]);

View file

@ -55,7 +55,7 @@ static void timestep_embedding_f32_sycl(
}); });
} }
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
const ggml_tensor *src0 = dst->src[0]; const ggml_tensor *src0 = dst->src[0];
const float * src0_d = static_cast<const float *>(src0->data); const float * src0_d = static_cast<const float *>(src0->data);
float * dst_d = static_cast<float *>(dst->data); float * dst_d = static_cast<float *>(dst->data);
@ -66,6 +66,10 @@ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tenso
const int dim = dst->op_params[0]; const int dim = dst->op_params[0];
const int max_period = dst->op_params[1]; const int max_period = dst->op_params[1];
GGML_SYCL_DEBUG("call %s\n", __func__);
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream); timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
GGML_SYCL_DEBUG("call %s done\n", __func__);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }

View file

@ -95,7 +95,7 @@ static void rwkv_wkv_f32_kernel(
} }
} }
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) try {
const float* k_d = (const float*)dst->src[0]->data; const float* k_d = (const float*)dst->src[0]->data;
const float* v_d = (const float*)dst->src[1]->data; const float* v_d = (const float*)dst->src[1]->data;
@ -121,6 +121,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const size_t shared_mem_size = WKV_BLOCK_SIZE * 4 * sizeof(float); // For k, r, tf, td const size_t shared_mem_size = WKV_BLOCK_SIZE * 4 * sizeof(float); // For k, r, tf, td
sycl::range<3> block_dims(1, 1, C / H); sycl::range<3> block_dims(1, 1, C / H);
sycl::range<3> grid_dims(1, 1, B * H); sycl::range<3> grid_dims(1, 1, B * H);
GGML_SYCL_DEBUG("call %s", __func__);
// Submit kernel // Submit kernel
stream->submit([&](sycl::handler& cgh) { stream->submit([&](sycl::handler& cgh) {
@ -135,5 +136,9 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
); );
}); });
}); });
GGML_SYCL_DEBUG("call %s done", __func__);
} catch (const sycl::exception & exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
std::exit(1);
} }