diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp index a240968ad..d41cfd3a6 100644 --- a/ggml/src/ggml-sycl/concat.cpp +++ b/ggml/src/ggml-sycl/concat.cpp @@ -158,8 +158,9 @@ static void concat_f32_sycl_non_cont( }); } -void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst) { +void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + const ggml_tensor *src0 = dst->src[0]; + const ggml_tensor *src1 = dst->src[1]; queue_ptr stream = ctx.stream(); const int32_t dim = ((int32_t *)dst->op_params)[0]; diff --git a/ggml/src/ggml-sycl/concat.hpp b/ggml/src/ggml-sycl/concat.hpp index 5a04feaab..e5cb7314c 100644 --- a/ggml/src/ggml-sycl/concat.hpp +++ b/ggml/src/ggml-sycl/concat.hpp @@ -15,7 +15,6 @@ #include "common.hpp" -void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst); +void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst); #endif // GGML_SYCL_CONCAT_HPP diff --git a/ggml/src/ggml-sycl/conv.cpp b/ggml/src/ggml-sycl/conv.cpp index bc4ab1ddb..ddba601e1 100644 --- a/ggml/src/ggml-sycl/conv.cpp +++ b/ggml/src/ggml-sycl/conv.cpp @@ -71,8 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl( }); } -void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst) { +void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + const ggml_tensor *src0 = dst->src[0]; + const ggml_tensor *src1 = dst->src[1]; const float * src0_d = (const float *)src0->data; const float * src1_d = (const float *)src1->data; diff --git a/ggml/src/ggml-sycl/conv.hpp b/ggml/src/ggml-sycl/conv.hpp index eb20730f9..f9e60dc75 100644 --- a/ggml/src/ggml-sycl/conv.hpp +++ b/ggml/src/ggml-sycl/conv.hpp @@ -15,7 +15,6 @@ #include "common.hpp" -void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst); +void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst); #endif // GGML_SYCL_CONV_HPP diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index d05a51f80..4bcd74376 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -882,149 +882,149 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor } -void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul); GGML_SYCL_DEBUG("call %s done\n", __func__); } -void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div); GGML_SYCL_DEBUG("call %s done\n", __func__); } diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 8152edf58..464432645 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -25,52 +25,52 @@ static __dpct_inline__ float op_div(const float a, const float b) { } -void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst); #endif // GGML_SYCL_ELEMENTWISE_HPP diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 312ccfeb8..a3779f180 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -1189,7 +1189,6 @@ std::unique_ptr ggml_backend_sycl_context::new_pool_for_device(q /// kernels typedef void (*cpy_kernel_t)(const char * cx, char * cdst); -typedef void (*ggml_sycl_func_t)(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); typedef void (*ggml_sycl_op_mul_mat_t)( ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, @@ -3171,33 +3170,33 @@ catch (sycl::exception const &exc) { } -static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat); GGML_SYCL_DEBUG("call %s done\n", __func__); } -static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows); GGML_SYCL_DEBUG("call %s done\n", __func__); } -static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm); GGML_SYCL_DEBUG("call %s done\n", __func__); } -static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm); GGML_SYCL_DEBUG("call %s done\n", __func__); } -static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm); GGML_SYCL_DEBUG("call %s done\n", __func__); } @@ -3572,9 +3571,10 @@ __dpct_inline__ static void k_copy_dst_from_contiguous( } } -static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, +static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, ggml_tensor *dst) try { + const ggml_tensor *src0 = dst->src[0]; + const ggml_tensor *src1 = dst->src[1]; GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers"); const ggml_tensor *ids = dst->src[2]; @@ -3740,12 +3740,12 @@ catch (sycl::exception const &exc) { std::exit(1); } -static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale); +static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale); } -static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp); +static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp); } static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -3787,8 +3787,6 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr ggml_type_name(src0->type), ggml_type_name(src1->type)); GGML_ABORT("fatal error"); } - - GGML_UNUSED(dst); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -3796,59 +3794,52 @@ catch (sycl::exception const &exc) { std::exit(1); } -static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { // TODO: why do we pass dst as src1 here? - ggml_sycl_cpy(ctx, src0, dst, nullptr); - GGML_UNUSED(src1); + ggml_sycl_cpy(ctx, dst->src[0], dst, nullptr); } -static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf); +static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf); } -static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max); +static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_soft_max); } -static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope); +static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope); } -static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d); +static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d); } -static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col); +static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col); } -static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_ASSERT(ggml_is_contiguous(src0)); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum); +static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(dst->src[0])); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum); } -static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_ASSERT(ggml_is_contiguous(src0)); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows); +static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(dst->src[0])); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows); } -static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_ASSERT(ggml_is_contiguous(src0)); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort); +static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(dst->src[0])); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort); } -static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_ASSERT(ggml_is_contiguous(src0)); - ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax); +static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(dst->src[0])); + ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax); } -static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_UNUSED(src0); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(ctx); -} void ggml_sycl_set_main_device(const int main_device) try { if (dpct::get_current_device_id() == static_cast (main_device)) { @@ -3871,191 +3862,189 @@ catch (sycl::exception const &exc) { std::exit(1); } -bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * tensor) { +bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) { if (!g_sycl_loaded) return false; - ggml_sycl_func_t func; + if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) { + ggml_sycl_set_peer_access(dst->src[1]->ne[1], ctx.device); + } - switch (tensor->op) { + switch (dst->op) { case GGML_OP_ARGMAX: - func = ggml_sycl_argmax; + ggml_sycl_argmax(ctx, dst); break; case GGML_OP_CONV_TRANSPOSE_1D: - func = ggml_sycl_op_conv_transpose_1d; + ggml_sycl_op_conv_transpose_1d(ctx, dst); break; case GGML_OP_REPEAT: - func = ggml_sycl_repeat; + ggml_sycl_repeat(ctx, dst); break; case GGML_OP_GET_ROWS: - func = ggml_sycl_get_rows; + ggml_sycl_get_rows(ctx, dst); break; case GGML_OP_DUP: - func = ggml_sycl_dup; + ggml_sycl_dup(ctx, dst); break; case GGML_OP_ADD: case GGML_OP_ADD1: // TODO: more efficient implementation - func = ggml_sycl_add; + ggml_sycl_add(ctx, dst); break; case GGML_OP_SUB: - func = ggml_sycl_sub; + ggml_sycl_sub(ctx, dst); break; case GGML_OP_ACC: - func = ggml_sycl_acc; + ggml_sycl_acc(ctx, dst); break; case GGML_OP_MUL: - func = ggml_sycl_mul; + ggml_sycl_mul(ctx, dst); break; case GGML_OP_LOG: - func = ggml_sycl_log; + ggml_sycl_log(ctx, dst); break; case GGML_OP_DIV: - func = ggml_sycl_div; + ggml_sycl_div(ctx, dst); break; case GGML_OP_UNARY: - switch (ggml_get_unary_op(tensor)) { + switch (ggml_get_unary_op(dst)) { case GGML_UNARY_OP_NEG: - func = ggml_sycl_neg; + ggml_sycl_neg(ctx, dst); break; case GGML_UNARY_OP_STEP: - func = ggml_sycl_step; + ggml_sycl_step(ctx, dst); break; case GGML_UNARY_OP_GELU: - func = ggml_sycl_gelu; + ggml_sycl_gelu(ctx, dst); break; case GGML_UNARY_OP_SILU: - func = ggml_sycl_silu; + ggml_sycl_silu(ctx, dst); break; case GGML_UNARY_OP_GELU_QUICK: - func = ggml_sycl_gelu_quick; + ggml_sycl_gelu_quick(ctx, dst); break; case GGML_UNARY_OP_TANH: - func = ggml_sycl_tanh; + ggml_sycl_tanh(ctx, dst); break; case GGML_UNARY_OP_RELU: - func = ggml_sycl_relu; + ggml_sycl_relu(ctx, dst); break; case GGML_UNARY_OP_SIGMOID: - func = ggml_sycl_sigmoid; + ggml_sycl_sigmoid(ctx, dst); break; case GGML_UNARY_OP_HARDSIGMOID: - func = ggml_sycl_hardsigmoid; + ggml_sycl_hardsigmoid(ctx, dst); break; case GGML_UNARY_OP_HARDSWISH: - func = ggml_sycl_hardswish; + ggml_sycl_hardswish(ctx, dst); break; case GGML_UNARY_OP_EXP: - func = ggml_sycl_exp; + ggml_sycl_exp(ctx, dst); break; default: return false; } break; case GGML_OP_NORM: - func = ggml_sycl_norm; + ggml_sycl_norm(ctx, dst); break; case GGML_OP_GROUP_NORM: - func = ggml_sycl_group_norm; + ggml_sycl_group_norm(ctx, dst); break; case GGML_OP_CONCAT: - func = ggml_sycl_op_concat; + ggml_sycl_op_concat(ctx, dst); break; case GGML_OP_UPSCALE: - func = ggml_sycl_upscale; + ggml_sycl_upscale(ctx, dst); break; case GGML_OP_PAD: - func = ggml_sycl_pad; + ggml_sycl_pad(ctx, dst); break; case GGML_OP_LEAKY_RELU: - func = ggml_sycl_leaky_relu; + ggml_sycl_leaky_relu(ctx, dst); break; case GGML_OP_RMS_NORM: - func = ggml_sycl_rms_norm; + ggml_sycl_rms_norm(ctx, dst); break; case GGML_OP_MUL_MAT: - if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) { + if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) { return false; } - func = ggml_sycl_mul_mat; + /* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */ + ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst); break; case GGML_OP_MUL_MAT_ID: - if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) { + if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) { return false; } - func = ggml_sycl_mul_mat_id; + ggml_sycl_mul_mat_id(ctx, dst); break; case GGML_OP_OUT_PROD: - func = ggml_sycl_op_out_prod; + ggml_sycl_op_out_prod(ctx, dst); break; case GGML_OP_SCALE: - func = ggml_sycl_scale; + ggml_sycl_scale(ctx, dst); break; case GGML_OP_SQR: - func = ggml_sycl_sqr; + ggml_sycl_sqr(ctx, dst); break; case GGML_OP_SQRT: - func = ggml_sycl_sqrt; + ggml_sycl_sqrt(ctx, dst); break; case GGML_OP_SIN: - func = ggml_sycl_sin; + ggml_sycl_sin(ctx, dst); break; case GGML_OP_COS: - func = ggml_sycl_cos; + ggml_sycl_cos(ctx, dst); break; case GGML_OP_CLAMP: - func = ggml_sycl_clamp; + ggml_sycl_clamp(ctx, dst); break; case GGML_OP_CPY: - func = ggml_sycl_cpy; + ggml_sycl_cpy(ctx, dst->src[0], dst->src[1], dst); break; case GGML_OP_CONT: - func = ggml_sycl_dup; + ggml_sycl_dup(ctx, dst); break; case GGML_OP_NONE: case GGML_OP_RESHAPE: case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: - func = ggml_sycl_nop; + GGML_SYCL_DEBUG("Tensor NO-OP\n"); break; case GGML_OP_DIAG_MASK_INF: - func = ggml_sycl_diag_mask_inf; + ggml_sycl_diag_mask_inf(ctx, dst); break; case GGML_OP_SOFT_MAX: - func = ggml_sycl_soft_max; + ggml_sycl_soft_max(ctx, dst); break; case GGML_OP_ROPE: - func = ggml_sycl_rope; + ggml_sycl_rope(ctx, dst); break; case GGML_OP_IM2COL: - func = ggml_sycl_im2col; + ggml_sycl_im2col(ctx, dst); break; case GGML_OP_POOL_2D: - func = ggml_sycl_pool2d; + ggml_sycl_pool2d(ctx, dst); break; case GGML_OP_SUM: - func = ggml_sycl_sum; + ggml_sycl_sum(ctx, dst); break; case GGML_OP_SUM_ROWS: - func = ggml_sycl_sum_rows; + ggml_sycl_sum_rows(ctx, dst); break; case GGML_OP_ARGSORT: - func = ggml_sycl_argsort; + ggml_sycl_argsort(ctx, dst); break; case GGML_OP_TIMESTEP_EMBEDDING: - func = ggml_sycl_op_timestep_embedding; + ggml_sycl_op_timestep_embedding(ctx, dst); break; case GGML_OP_RWKV_WKV6: - func = ggml_sycl_op_rwkv_wkv6; + ggml_sycl_op_rwkv_wkv6(ctx, dst); break; default: return false; } - if (tensor->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(tensor->src[0]->buffer)) { - ggml_sycl_set_peer_access(tensor->src[1]->ne[1], ctx.device); - } - - func(ctx, tensor->src[0], tensor->src[1], tensor); return true; } diff --git a/ggml/src/ggml-sycl/outprod.cpp b/ggml/src/ggml-sycl/outprod.cpp index ef9af0b76..8e8347ff4 100644 --- a/ggml/src/ggml-sycl/outprod.cpp +++ b/ggml/src/ggml-sycl/outprod.cpp @@ -3,9 +3,9 @@ #include "outprod.hpp" -void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst) { - +void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { + const ggml_tensor *src0 = dst->src[0]; + const ggml_tensor *src1 = dst->src[1]; GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32); diff --git a/ggml/src/ggml-sycl/outprod.hpp b/ggml/src/ggml-sycl/outprod.hpp index 9c042738a..f50413d3f 100644 --- a/ggml/src/ggml-sycl/outprod.hpp +++ b/ggml/src/ggml-sycl/outprod.hpp @@ -3,8 +3,7 @@ #include "common.hpp" -void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst); +void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst); #endif // GGML_SYCL_OUTPROD_HPP diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp index 2ffe3cca9..b877d18c1 100644 --- a/ggml/src/ggml-sycl/tsembd.cpp +++ b/ggml/src/ggml-sycl/tsembd.cpp @@ -55,8 +55,9 @@ static void timestep_embedding_f32_sycl( }); } -void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor * dst) { +void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + const ggml_tensor *src0 = dst->src[0]; + const ggml_tensor *src1 = dst->src[1]; const float * src0_d = (const float *)src0->data; float * dst_d = (float *)dst->data; dpct::queue_ptr stream = ctx.stream(); diff --git a/ggml/src/ggml-sycl/tsembd.hpp b/ggml/src/ggml-sycl/tsembd.hpp index ff854c337..4c18748bb 100644 --- a/ggml/src/ggml-sycl/tsembd.hpp +++ b/ggml/src/ggml-sycl/tsembd.hpp @@ -15,7 +15,6 @@ #include "common.hpp" -void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor * dst); +void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst); #endif // GGML_SYCL_TSEMBD_HPP diff --git a/ggml/src/ggml-sycl/wkv6.cpp b/ggml/src/ggml-sycl/wkv6.cpp index 105db6f03..4fed18c2a 100644 --- a/ggml/src/ggml-sycl/wkv6.cpp +++ b/ggml/src/ggml-sycl/wkv6.cpp @@ -95,8 +95,10 @@ static void rwkv_wkv_f32_kernel( } } -void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst) { +void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { + + const ggml_tensor *src0 = dst->src[0]; + const ggml_tensor *src1 = dst->src[1]; const float* k_d = (const float*)dst->src[0]->data; const float* v_d = (const float*)dst->src[1]->data; diff --git a/ggml/src/ggml-sycl/wkv6.hpp b/ggml/src/ggml-sycl/wkv6.hpp index ddfa3377b..8c596a997 100644 --- a/ggml/src/ggml-sycl/wkv6.hpp +++ b/ggml/src/ggml-sycl/wkv6.hpp @@ -3,8 +3,7 @@ #include "common.hpp" -void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor * dst); +void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, ggml_tensor * dst); #endif // GGML_SYCL_WKV6_HPP