diff --git a/ggml/src/ggml-sycl/argmax.cpp b/ggml/src/ggml-sycl/argmax.cpp index 502f82840..b7d0471f8 100644 --- a/ggml/src/ggml-sycl/argmax.cpp +++ b/ggml/src/ggml-sycl/argmax.cpp @@ -52,7 +52,7 @@ static void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * d GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_I32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ncols = dst->src[0]->ne[0]; const int64_t nrows = ggml_nrows(dst->src[0]); diff --git a/ggml/src/ggml-sycl/argsort.cpp b/ggml/src/ggml-sycl/argsort.cpp index 4557599db..282ee6cc6 100644 --- a/ggml/src/ggml-sycl/argsort.cpp +++ b/ggml/src/ggml-sycl/argsort.cpp @@ -105,6 +105,7 @@ static void argsort_f32_i32_sycl(const float * x, int * dst, const int ncols, co inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_I32); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ncols = dst->src[0]->ne[0]; const int64_t nrows = ggml_nrows(dst->src[0]); diff --git a/ggml/src/ggml-sycl/binbcast.cpp b/ggml/src/ggml-sycl/binbcast.cpp index 0d3015024..9d9b1ab02 100644 --- a/ggml/src/ggml-sycl/binbcast.cpp +++ b/ggml/src/ggml-sycl/binbcast.cpp @@ -233,6 +233,8 @@ inline void ggml_sycl_op_bin_bcast(const ggml_tensor * src0, const ggml_tensor * } inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const void * src0_dd = static_cast(dst->src[0]->data); const void * src1_dd = static_cast(dst->src[1]->data); void * dst_dd = static_cast(dst->data); @@ -247,6 +249,8 @@ inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const void * src0_dd = static_cast(dst->src[0]->data); const void * src1_dd = static_cast(dst->src[1]->data); void * dst_dd = static_cast(dst->data); @@ -261,6 +265,8 @@ inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const void * src0_dd = static_cast(dst->src[0]->data); const void * src1_dd = static_cast(dst->src[1]->data); void * dst_dd = static_cast(dst->data); @@ -275,6 +281,8 @@ inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const void * src0_dd = static_cast(dst->src[0]->data); const void * src1_dd = static_cast(dst->src[1]->data); void * dst_dd = static_cast(dst->data); @@ -289,6 +297,7 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const void * src0_d = static_cast(dst->src[0]->data); void * dst_d = static_cast(dst->data); dpct::queue_ptr main_stream = ctx.stream(); diff --git a/ggml/src/ggml-sycl/clamp.cpp b/ggml/src/ggml-sycl/clamp.cpp index 8ffee729c..35eb8deca 100644 --- a/ggml/src/ggml-sycl/clamp.cpp +++ b/ggml/src/ggml-sycl/clamp.cpp @@ -23,7 +23,7 @@ static void clamp_f32_sycl(const float * x, float * dst, const float min, const inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); float min; float max; diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 3cdc76223..e425f5ec6 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -52,6 +52,16 @@ bool gpu_has_xmx(sycl::device &dev) { return dev.has(sycl::aspect::ext_intel_matrix); } +const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) { + return GGML_SYCL_NAME "_Split"; + + GGML_UNUSED(buft); +} + +bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) { + return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name; +} + int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) { const int64_t max_range = std::numeric_limits::max(); int64_t sycl_down_blk_size = block_size; diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 0eb291ecc..faf25dd9c 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -436,6 +436,8 @@ typedef void (*ggml_sycl_op_flatten_t)(ggml_backend_sycl_context & ctx, const gg const queue_ptr &main_stream); bool gpu_has_xmx(sycl::device &dev); +const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft); +bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer); // Some backend specific macros #define GGML_SYCL_TENSOR_BINARY_OP_LOCALS \ diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp index fa44cd7b6..c69e109d8 100644 --- a/ggml/src/ggml-sycl/concat.cpp +++ b/ggml/src/ggml-sycl/concat.cpp @@ -159,34 +159,34 @@ static void concat_f32_sycl_non_cont( } static void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { - const ggml_tensor *src0 = dst->src[0]; - const ggml_tensor *src1 = dst->src[1]; - queue_ptr stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + queue_ptr stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - const int32_t dim = ((int32_t *)dst->op_params)[0]; + const int32_t dim = ((int32_t *) dst->op_params)[0]; - if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) { - const float *src0_d = (const float *)src0->data; - const float *src1_d = (const float *)src1->data; + if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) { + const float * src0_d = (const float *) src0->data; + const float * src1_d = (const float *) src1->data; - float *dst_d = (float *)dst->data; + float * dst_d = (float *) dst->data; - if (dim != 3) { - for (int i3 = 0; i3 < dst->ne[3]; i3++) { - concat_f32_sycl( - src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4), - dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], - src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream); - } - } else { - const size_t size0 = ggml_nbytes(src0); - const size_t size1 = ggml_nbytes(src1); + if (dim != 3) { + for (int i3 = 0; i3 < dst->ne[3]; i3++) { + concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4), + dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0], + dst->ne[1], dst->ne[2], dim, stream); + } + } else { + const size_t size0 = ggml_nbytes(src0); + const size_t size1 = ggml_nbytes(src1); - SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait())); - SYCL_CHECK(CHECK_TRY_ERROR( - stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait())); - } + SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait())); + SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait())); + } } else concat_f32_sycl_non_cont( stream, (const char *)src0->data, (const char *)src1->data, diff --git a/ggml/src/ggml-sycl/conv.cpp b/ggml/src/ggml-sycl/conv.cpp index ef3108591..578656a8b 100644 --- a/ggml/src/ggml-sycl/conv.cpp +++ b/ggml/src/ggml-sycl/conv.cpp @@ -71,7 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl( }); } -void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +static void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const ggml_tensor *src0 = dst->src[0]; const ggml_tensor *src1 = dst->src[1]; const float * src0_d = (const float *)src0->data; @@ -97,4 +99,13 @@ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor src0->ne[0], src0->ne[1], src0->ne[2], src1->ne[0], dst->ne[0], src0_d, src1_d, dst_d, stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } + +void ggml_sycl_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_SYCL_DEBUG("call %s\n", __func__); + ggml_sycl_op_conv_transpose_1d(ctx, dst); + GGML_SYCL_DEBUG("call %s done\n", __func__); +} \ No newline at end of file diff --git a/ggml/src/ggml-sycl/conv.hpp b/ggml/src/ggml-sycl/conv.hpp index f9e60dc75..d5d69b3f8 100644 --- a/ggml/src/ggml-sycl/conv.hpp +++ b/ggml/src/ggml-sycl/conv.hpp @@ -15,6 +15,6 @@ #include "common.hpp" -void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst); +void ggml_sycl_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst); #endif // GGML_SYCL_CONV_HPP diff --git a/ggml/src/ggml-sycl/cpy.cpp b/ggml/src/ggml-sycl/cpy.cpp index e6267dbf7..1559db5dc 100644 --- a/ggml/src/ggml-sycl/cpy.cpp +++ b/ggml/src/ggml-sycl/cpy.cpp @@ -339,13 +339,14 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co } void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try { + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer)); const int64_t ne = ggml_nelements(src0); GGML_ASSERT(ne == ggml_nelements(src1)); GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); - GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS; + GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS SYCL_CHECK(ggml_sycl_set_device(ctx.device)); queue_ptr main_stream = ctx.stream(); diff --git a/ggml/src/ggml-sycl/diagmask.cpp b/ggml/src/ggml-sycl/diagmask.cpp index 8d61463f2..1ec00194d 100644 --- a/ggml/src/ggml-sycl/diagmask.cpp +++ b/ggml/src/ggml-sycl/diagmask.cpp @@ -29,7 +29,7 @@ static void diag_mask_inf_f32_sycl(const float * x, float * dst, const int ncols inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ne00 = dst->src[0]->ne[0]; const int64_t ne01 = dst->src[0]->ne[1]; diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 12890cd83..f1d994844 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -508,87 +508,105 @@ void pad_f32_sycl(const float *x, float *dst, const int ne00, }); } -inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); @@ -596,115 +614,142 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor float * dst_dd = static_cast(dst->data); hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); log_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); step_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); float negative_slope; memcpy(&negative_slope, dst->op_params, sizeof(float)); @@ -715,26 +760,32 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor SYCL_CHECK(ggml_sycl_set_device(ctx.device)); leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), negative_slope, main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const float sf0 = (float)dst->ne[0]/dst->src[0]->ne[0]; const float sf1 = (float)dst->ne[1]/dst->src[0]->ne[1]; @@ -749,14 +800,17 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * upscale_f32_sycl(src0_dd, dst_dd, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], dst->src[0]->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } -inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const float * src0_dd = static_cast(dst->src[0]->data); float * dst_dd = static_cast(dst->data); @@ -766,17 +820,20 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) pad_f32_sycl(src0_dd, dst_dd, dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, - ggml_tensor *dst) { + ggml_tensor *dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported - GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); @@ -790,6 +847,9 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, int offset = dst->op_params[3] / 4; // offset in bytes acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { diff --git a/ggml/src/ggml-sycl/getrows.cpp b/ggml/src/ggml-sycl/getrows.cpp index 1833c80a8..00fe37d8e 100644 --- a/ggml/src/ggml-sycl/getrows.cpp +++ b/ggml/src/ggml-sycl/getrows.cpp @@ -126,15 +126,15 @@ template static void get_rows_sycl_float(ggml_backend_sycl_con } } -void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->nb[0] == ggml_type_size(dst->src[0]->type)); GGML_ASSERT(dst->src[1]->nb[0] == ggml_type_size(dst->src[1]->type)); GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type)); - GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); switch (dst->src[0]->type) { case GGML_TYPE_F16: @@ -164,4 +164,13 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ABORT("fatal error"); break; } +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + +void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_SYCL_DEBUG("call %s\n", __func__); + ggml_sycl_op_get_rows(ctx, dst); + GGML_SYCL_DEBUG("call %s\n", __func__); } diff --git a/ggml/src/ggml-sycl/getrows.hpp b/ggml/src/ggml-sycl/getrows.hpp index 7060b04d4..7dc21b992 100644 --- a/ggml/src/ggml-sycl/getrows.hpp +++ b/ggml/src/ggml-sycl/getrows.hpp @@ -3,6 +3,6 @@ #include "common.hpp" -void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst); #endif // GGML_SYCL_GETROWS_HPP diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index ea8a98e87..88733dfa0 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -911,17 +911,6 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_split_buffer_interface = { }; // sycl split buffer type - -static const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) { - return GGML_SYCL_NAME "_Split"; - - GGML_UNUSED(buft); -} - -static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) { - return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name; -} - static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { // since we don't know the exact split after rounding, we cannot allocate the device buffers at this point // instead, we allocate them for each tensor separately in init_tensor @@ -2686,13 +2675,13 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens ggml_sycl_argmax(ctx, dst); break; case GGML_OP_CONV_TRANSPOSE_1D: - ggml_sycl_op_conv_transpose_1d(ctx, dst); + ggml_sycl_conv_transpose_1d(ctx, dst); break; case GGML_OP_REPEAT: ggml_sycl_repeat(ctx, dst); break; case GGML_OP_GET_ROWS: - ggml_sycl_op_get_rows(ctx, dst); + ggml_sycl_get_rows(ctx, dst); break; case GGML_OP_DUP: ggml_sycl_dup(ctx, dst); @@ -2854,7 +2843,7 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens ggml_sycl_op_rwkv_wkv6(ctx, dst); break; case GGML_OP_GATED_LINEAR_ATTN: - ggml_sycl_op_gated_linear_attn(ctx, dst); + ggml_sycl_gated_linear_attn(ctx, dst); break; default: return false; diff --git a/ggml/src/ggml-sycl/gla.cpp b/ggml/src/ggml-sycl/gla.cpp index 630db5ba6..dd01ec3f8 100644 --- a/ggml/src/ggml-sycl/gla.cpp +++ b/ggml/src/ggml-sycl/gla.cpp @@ -75,7 +75,7 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B, }); } -void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +static void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { const float * k_d = static_cast(dst->src[0]->data); const float * v_d = static_cast(dst->src[1]->data); const float * r_d = static_cast(dst->src[2]->data); @@ -103,4 +103,13 @@ void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor } else { gated_linear_attn_f32_kernel<128>(stream, B, T, C, H, scale, k_d, v_d, r_d, td_d, s_d, dst_d); } +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + +void ggml_sycl_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_SYCL_DEBUG("call %s\n", __func__); + ggml_sycl_op_gated_linear_attn(ctx, dst); + GGML_SYCL_DEBUG("call %s done\n", __func__); } diff --git a/ggml/src/ggml-sycl/gla.hpp b/ggml/src/ggml-sycl/gla.hpp index 607cf3a7f..a7e3af79d 100644 --- a/ggml/src/ggml-sycl/gla.hpp +++ b/ggml/src/ggml-sycl/gla.hpp @@ -3,6 +3,6 @@ #include "common.hpp" -void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); #endif // GGML_SYCL_GLA_HPP diff --git a/ggml/src/ggml-sycl/im2col.cpp b/ggml/src/ggml-sycl/im2col.cpp index 834aec5a8..671229070 100644 --- a/ggml/src/ggml-sycl/im2col.cpp +++ b/ggml/src/ggml-sycl/im2col.cpp @@ -86,7 +86,7 @@ static void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * d GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp index 54e9ca558..1cf6e01f5 100644 --- a/ggml/src/ggml-sycl/norm.cpp +++ b/ggml/src/ggml-sycl/norm.cpp @@ -315,7 +315,7 @@ static void ggml_sycl_op_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ne00 = dst->src[0]->ne[0]; const int64_t nrows = ggml_nrows(dst->src[0]); @@ -338,7 +338,7 @@ static void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); int num_groups = dst->op_params[0]; @@ -360,7 +360,7 @@ static void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ne00 = dst->src[0]->ne[0]; const int64_t nrows = ggml_nrows(dst->src[0]); diff --git a/ggml/src/ggml-sycl/outprod.cpp b/ggml/src/ggml-sycl/outprod.cpp index 27c3adca4..d21196a21 100644 --- a/ggml/src/ggml-sycl/outprod.cpp +++ b/ggml/src/ggml-sycl/outprod.cpp @@ -39,6 +39,7 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { const oneapi::mkl::transpose src1_op = src1_T ? oneapi::mkl::transpose::nontrans : oneapi::mkl::transpose::trans; const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float); + GGML_SYCL_DEBUG("call %s\n", __func__); try { // Perform matrix multiplication using oneMKL GEMM @@ -55,4 +56,5 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { std::cerr << exc.what() << std::endl; GGML_ASSERT(false); } + GGML_SYCL_DEBUG("call %s done\n", __func__); } diff --git a/ggml/src/ggml-sycl/pool2d.cpp b/ggml/src/ggml-sycl/pool2d.cpp index ee67307ca..b010e7d3a 100644 --- a/ggml/src/ggml-sycl/pool2d.cpp +++ b/ggml/src/ggml-sycl/pool2d.cpp @@ -71,7 +71,7 @@ static void pool2d_nchw_kernel(const int ih, const int iw, const int oh, const i static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int32_t * opts = (const int32_t *) dst->op_params; enum ggml_op_pool op = static_cast(opts[0]); diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp index b7f03222f..78fe1d001 100644 --- a/ggml/src/ggml-sycl/rope.cpp +++ b/ggml/src/ggml-sycl/rope.cpp @@ -197,8 +197,8 @@ static void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); GGML_ASSERT(dst->src[0]->type == dst->type); - GGML_ASSERT(strcmp(dst->src[1]->buffer->buft->iface.get_name(dst->src[1]->buffer->buft), GGML_SYCL_NAME "_Split") != 0); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ne00 = dst->src[0]->ne[0]; const int64_t ne01 = dst->src[0]->ne[1]; diff --git a/ggml/src/ggml-sycl/scale.cpp b/ggml/src/ggml-sycl/scale.cpp index f37f852cf..74c117836 100644 --- a/ggml/src/ggml-sycl/scale.cpp +++ b/ggml/src/ggml-sycl/scale.cpp @@ -21,7 +21,7 @@ static void scale_f32_sycl(const float * x, float * dst, const float scale, cons inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); float scale; memcpy(&scale, dst->op_params, sizeof(float)); diff --git a/ggml/src/ggml-sycl/softmax.cpp b/ggml/src/ggml-sycl/softmax.cpp index 018fed5a9..b2b73dc68 100644 --- a/ggml/src/ggml-sycl/softmax.cpp +++ b/ggml/src/ggml-sycl/softmax.cpp @@ -228,6 +228,7 @@ static void ggml_sycl_op_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); GGML_ASSERT(!dst->src[1] || dst->src[1]->type == GGML_TYPE_F16 || dst->src[1]->type == GGML_TYPE_F32); // src1 contains mask and it is optional diff --git a/ggml/src/ggml-sycl/sum.cpp b/ggml/src/ggml-sycl/sum.cpp index 66d3c8f6d..acec1493a 100644 --- a/ggml/src/ggml-sycl/sum.cpp +++ b/ggml/src/ggml-sycl/sum.cpp @@ -27,7 +27,7 @@ static void sum_rows_f32_sycl(const float * x, float * dst, const int ncols, con inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ne = ggml_nelements(dst->src[0]); dpct::queue_ptr main_stream = ctx.stream(); @@ -44,7 +44,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); const int64_t ncols = dst->src[0]->ne[0]; const int64_t nrows = ggml_nrows(dst->src[0]); diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp index 9de324c3a..a5aedd4d0 100644 --- a/ggml/src/ggml-sycl/tsembd.cpp +++ b/ggml/src/ggml-sycl/tsembd.cpp @@ -55,7 +55,7 @@ static void timestep_embedding_f32_sycl( }); } -void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { const ggml_tensor *src0 = dst->src[0]; const float * src0_d = static_cast(src0->data); float * dst_d = static_cast(dst->data); @@ -66,6 +66,10 @@ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tenso const int dim = dst->op_params[0]; const int max_period = dst->op_params[1]; - + GGML_SYCL_DEBUG("call %s\n", __func__); timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream); + GGML_SYCL_DEBUG("call %s done\n", __func__); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } diff --git a/ggml/src/ggml-sycl/wkv6.cpp b/ggml/src/ggml-sycl/wkv6.cpp index 9c20135fd..4c01f29b9 100644 --- a/ggml/src/ggml-sycl/wkv6.cpp +++ b/ggml/src/ggml-sycl/wkv6.cpp @@ -95,7 +95,7 @@ static void rwkv_wkv_f32_kernel( } } -void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { +void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) try { const float* k_d = (const float*)dst->src[0]->data; const float* v_d = (const float*)dst->src[1]->data; @@ -121,6 +121,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { const size_t shared_mem_size = WKV_BLOCK_SIZE * 4 * sizeof(float); // For k, r, tf, td sycl::range<3> block_dims(1, 1, C / H); sycl::range<3> grid_dims(1, 1, B * H); + GGML_SYCL_DEBUG("call %s", __func__); // Submit kernel stream->submit([&](sycl::handler& cgh) { @@ -135,5 +136,9 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { ); }); }); + GGML_SYCL_DEBUG("call %s done", __func__); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); }