From ddc5e428f26a97f41f5b426d232be57d74277657 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 2 Feb 2025 12:08:22 +0530 Subject: [PATCH] clamp: move to a separate file --- ggml/src/ggml-sycl/backend.hpp | 1 + ggml/src/ggml-sycl/clamp.cpp | 51 ++++++++++++++++++++++++++++++++ ggml/src/ggml-sycl/clamp.hpp | 8 +++++ ggml/src/ggml-sycl/ggml-sycl.cpp | 51 -------------------------------- 4 files changed, 60 insertions(+), 51 deletions(-) create mode 100644 ggml/src/ggml-sycl/clamp.cpp create mode 100644 ggml/src/ggml-sycl/clamp.hpp diff --git a/ggml/src/ggml-sycl/backend.hpp b/ggml/src/ggml-sycl/backend.hpp index 6923214d5..efe88fb20 100644 --- a/ggml/src/ggml-sycl/backend.hpp +++ b/ggml/src/ggml-sycl/backend.hpp @@ -36,6 +36,7 @@ #include "getrows.hpp" #include "diagmask.hpp" #include "scale.hpp" +#include "clamp.hpp" #include "gla.hpp" #endif // GGML_SYCL_BACKEND_HPP diff --git a/ggml/src/ggml-sycl/clamp.cpp b/ggml/src/ggml-sycl/clamp.cpp new file mode 100644 index 000000000..f1c20d3ca --- /dev/null +++ b/ggml/src/ggml-sycl/clamp.cpp @@ -0,0 +1,51 @@ +#include "clamp.hpp" + +static void clamp_f32(const float * x, float * dst, const float min, const float max, const int k, + const sycl::nd_item<3> & item_ct1) { + const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); + + if (i >= k) { + return; + } + + dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); +} + +static void clamp_f32_sycl(const float * x, float * dst, const float min, const float max, const int k, + queue_ptr stream) { + const int num_blocks = (k + SYCL_CLAMP_BLOCK_SIZE - 1) / SYCL_CLAMP_BLOCK_SIZE; + stream->parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { clamp_f32(x, dst, min, max, k, item_ct1); }); +} + +inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try { + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F32); + GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + + float min; + float max; + memcpy(&min, dst->op_params, sizeof(float)); + memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); + const dpct::queue_ptr main_stream = ctx.stream(); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); + + clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(dst->src[0]), main_stream); + /* + DPCT1010:88: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this code. + SYCL_CHECK(0); + */ +} catch (const sycl::exception & exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + +void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_SYCL_DEBUG("call %s\n", __func__); + ggml_sycl_op_clamp(ctx, dst); + GGML_SYCL_DEBUG("call %s done\n", __func__); +} diff --git a/ggml/src/ggml-sycl/clamp.hpp b/ggml/src/ggml-sycl/clamp.hpp new file mode 100644 index 000000000..fdfbff55b --- /dev/null +++ b/ggml/src/ggml-sycl/clamp.hpp @@ -0,0 +1,8 @@ +#ifndef GGML_SYCL_CLAMP_HPP +#define GGML_SYCL_CLAMP_HPP + +#include "common.hpp" + +void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + +#endif // GGML_SYCL_CLAMP_HPP diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index e6a4531f7..3773187fc 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -1463,18 +1463,6 @@ static void k_sum_rows_f32(const float * x, float * dst, const int ncols, } } -static void clamp_f32(const float * x, float * dst, const float min, const float max, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); - - if (i >= k) { - return; - } - - dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); -} - template static void pool2d_nchw_kernel( const int ih, const int iw, const int oh, const int ow, @@ -1600,19 +1588,6 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl( } } -static void clamp_f32_sycl(const float *x, float *dst, const float min, - const float max, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_CLAMP_BLOCK_SIZE - 1) / SYCL_CLAMP_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - clamp_f32(x, dst, min, max, k, item_ct1); - }); -} - static void sum_rows_f32_sycl(const float *x, float *dst, const int ncols, const int nrows, queue_ptr stream) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); @@ -1905,28 +1880,6 @@ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream); } -inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); - - float min; - float max; - memcpy(&min, dst->op_params, sizeof(float)); - memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); - const dpct::queue_ptr main_stream = ctx.stream(); - const float * src0_dd = static_cast(dst->src[0]->data); - float * dst_dd = static_cast(dst->data); - - clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(dst->src[0]), main_stream); - /* - DPCT1010:88: SYCL uses exceptions to report errors and does not use the - error codes. The call was replaced with 0. You need to rewrite this code. - */ - SYCL_CHECK(0); -} - static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) { static bool peer_access_enabled = false; @@ -2848,10 +2801,6 @@ catch (sycl::exception const &exc) { std::exit(1); } -static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_clamp(ctx, dst); -} - static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_pool2d(ctx, dst); }