diff --git a/ggml/src/ggml-sycl/backend.hpp b/ggml/src/ggml-sycl/backend.hpp index 92519caf8..6923214d5 100644 --- a/ggml/src/ggml-sycl/backend.hpp +++ b/ggml/src/ggml-sycl/backend.hpp @@ -35,6 +35,7 @@ #include "cpy.hpp" #include "getrows.hpp" #include "diagmask.hpp" +#include "scale.hpp" #include "gla.hpp" #endif // GGML_SYCL_BACKEND_HPP diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 7059c46d6..bf1c14971 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -1463,18 +1463,6 @@ static void k_sum_rows_f32(const float * x, float * dst, const int ncols, } } -static void scale_f32(const float * x, float * dst, const float scale, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); - - if (i >= k) { - return; - } - - dst[i] = scale * x[i]; -} - static void clamp_f32(const float * x, float * dst, const float min, const float max, const int k, const sycl::nd_item<3> &item_ct1) { const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + @@ -1612,18 +1600,6 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl( } } -static void scale_f32_sycl(const float *x, float *dst, const float scale, - const int k, queue_ptr stream) { - const int num_blocks = (k + SYCL_SCALE_BLOCK_SIZE - 1) / SYCL_SCALE_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - scale_f32(x, dst, scale, k, item_ct1); - }); -} - static void clamp_f32_sycl(const float *x, float *dst, const float min, const float max, const int k, queue_ptr stream) { @@ -1929,27 +1905,6 @@ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream); } -inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); - - float scale; - memcpy(&scale, dst->op_params, sizeof(float)); - const float * src0_dd = static_cast(dst->src[0]->data); - float * dst_dd = static_cast(dst->data); - - dpct::queue_ptr main_stream = ctx.stream(); - - scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream); - /* - DPCT1010:87: SYCL uses exceptions to report errors and does not use the - error codes. The call was replaced with 0. You need to rewrite this code. - */ - SYCL_CHECK(0); -} - inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); @@ -2893,10 +2848,6 @@ catch (sycl::exception const &exc) { std::exit(1); } -static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_scale(ctx, dst); -} - static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_clamp(ctx, dst); } diff --git a/ggml/src/ggml-sycl/scale.cpp b/ggml/src/ggml-sycl/scale.cpp new file mode 100644 index 000000000..c21952697 --- /dev/null +++ b/ggml/src/ggml-sycl/scale.cpp @@ -0,0 +1,48 @@ +#include "scale.hpp" + +static void scale_f32(const float * x, float * dst, const float scale, const int k, const sycl::nd_item<3> & item_ct1) { + const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); + + if (i >= k) { + return; + } + + dst[i] = scale * x[i]; +} + +static void scale_f32_sycl(const float * x, float * dst, const float scale, const int k, queue_ptr stream) { + const int num_blocks = (k + SYCL_SCALE_BLOCK_SIZE - 1) / SYCL_SCALE_BLOCK_SIZE; + stream->parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { scale_f32(x, dst, scale, k, item_ct1); }); +} + +inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F32); + GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + + float scale; + memcpy(&scale, dst->op_params, sizeof(float)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); + + dpct::queue_ptr main_stream = ctx.stream(); + + scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream); + /* + DPCT1010:87: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this code. + */ + SYCL_CHECK(0); +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + +void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_SYCL_DEBUG("call %s\n", __func__); + ggml_sycl_op_scale(ctx, dst); + GGML_SYCL_DEBUG("call %s done\n", __func__); +} diff --git a/ggml/src/ggml-sycl/scale.hpp b/ggml/src/ggml-sycl/scale.hpp new file mode 100644 index 000000000..d079a1e5b --- /dev/null +++ b/ggml/src/ggml-sycl/scale.hpp @@ -0,0 +1,8 @@ +#ifndef GGML_SYCL_SCALE_HPP +#define GGML_SYCL_SCALE_HPP + +#include "common.hpp" + +void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + +#endif // GGML_SYCL_SCALE_HPP