SYCL: remove ggml_sycl_op_flatten function

This commit is contained in:
Akarshan Biswas 2025-01-30 19:46:34 +05:30
parent 9f4cc8f8d3
commit 2d72bd94b0
No known key found for this signature in database
GPG key ID: 52A578A14B32134D
12 changed files with 469 additions and 692 deletions

View file

@ -65,37 +65,3 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
} }
return sycl_down_blk_size; return sycl_down_blk_size;
} }
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
const ggml_sycl_op_flatten_t op) try {
const bool use_src1 = src1 != nullptr;
if(use_src1)
GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
// dd = data device
float * src0_ddf = (float *) src0->data;
float * src1_ddf = use_src1 ? (float *) src1->data : nullptr;
float * dst_ddf = (float *) dst->data;
ggml_sycl_pool_alloc<float> src0_f(ctx.pool());
ggml_sycl_pool_alloc<float> src1_f(ctx.pool());
ggml_sycl_pool_alloc<float> dst_f(ctx.pool());
ggml_sycl_set_device(ctx.device);
queue_ptr main_stream = ctx.stream();
// GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n",
// ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device);
// do the computation
op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream);
// print_ggml_tensor("tensor", dst);
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}

View file

@ -677,8 +677,17 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
bool gpu_has_xmx(sycl::device &dev); bool gpu_has_xmx(sycl::device &dev);
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, // Some backend specific macros
const ggml_tensor *src1, ggml_tensor *dst, #define GGML_SYCL_TENSOR_BINARY_OP_LOCALS \
const ggml_sycl_op_flatten_t op); GGML_TENSOR_LOCALS(int64_t, ne0, dst->src[0], ne) \
GGML_TENSOR_LOCALS(size_t, nb0, dst->src[0], nb) GGML_TENSOR_LOCALS(int64_t, ne1, dst->src[1], ne) \
GGML_TENSOR_LOCALS(size_t, nb1, dst->src[1], nb) GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#define GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS \
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
#endif // GGML_SYCL_COMMON_HPP #endif // GGML_SYCL_COMMON_HPP

View file

@ -1,5 +1,6 @@
#include "common.hpp" #include "common.hpp"
#include "element_wise.hpp" #include "element_wise.hpp"
#include "ggml.h"
void acc_f32(const float * x, const float * y, float * dst, const int ne, void acc_f32(const float * x, const float * y, float * dst, const int ne,
const int ne10, const int ne11, const int ne12, const int ne10, const int ne11, const int ne12,
@ -509,497 +510,410 @@ void pad_f32_sycl(const float *x, float *dst, const int ne00,
}); });
} }
inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
GGML_UNUSED(src1); float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(dst); tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); log_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
GGML_UNUSED(src1); cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); step_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
}
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32);
float negative_slope; float negative_slope;
memcpy(&negative_slope, dst->op_params, sizeof(float)); memcpy(&negative_slope, dst->op_params, sizeof(float));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream); dpct::queue_ptr main_stream = ctx.stream();
GGML_UNUSED(src1); leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), negative_slope, main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const float sf0 = (float)dst->ne[0]/src0->ne[0]; const float sf0 = (float)dst->ne[0]/dst->src[0]->ne[0];
const float sf1 = (float)dst->ne[1]/src0->ne[1]; const float sf1 = (float)dst->ne[1]/dst->src[0]->ne[1];
const float sf2 = (float)dst->ne[2]/src0->ne[2]; const float sf2 = (float)dst->ne[2]/dst->src[0]->ne[2];
const float sf3 = (float)dst->ne[3]/src0->ne[3]; const float sf3 = (float)dst->ne[3]/dst->src[0]->ne[3];
upscale_f32_sycl(src0_dd, dst_dd, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
upscale_f32_sycl(src0_dd, dst_dd, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], dst->src[0]->nb[3],
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
main_stream); main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
pad_f32_sycl(src0_dd, dst_dd, pad_f32_sycl(src0_dd, dst_dd,
src0->ne[0], src0->ne[1], src0->ne[2], dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2],
dst->ne[0], dst->ne[1], dst->ne[2], main_stream); dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx,
ggml_tensor *dst, const float *src0_dd, ggml_tensor *dst) {
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
int nb1 = dst->op_params[0] / 4; // 4 bytes of float32 int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
int nb2 = dst->op_params[1] / 4; // 4 bytes of float32 int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
// int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused
int offset = dst->op_params[3] / 4; // offset in bytes int offset = dst->op_params[3] / 4; // offset in bytes
acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream); acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx,
ggml_tensor *dst, const float *src0_dd, ggml_tensor *dst) {
const float *src1_dd, float *dst_dd, // TODO: remove duplicate variables
const queue_ptr &main_stream) { const float * src0_dd = static_cast<float *>(dst->src[0]->data);
const float * src1_dd = static_cast<float *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
const dpct::queue_ptr main_stream = ctx.stream();
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_add>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_add>>(ctx, dst->src[0], dst->src[1], dst, src0_dd, src1_dd, dst_dd, main_stream);
} }
inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
ggml_tensor *dst, const float *src0_dd, // TODO: remove duplicate variables
const float *src1_dd, float *dst_dd, const float * src0_dd = static_cast<float *>(dst->src[0]->data);
const queue_ptr &main_stream) { const float * src1_dd = static_cast<float *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
const dpct::queue_ptr main_stream = ctx.stream();
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, dst->src[0], dst->src[1], dst, src0_dd, src1_dd, dst_dd, main_stream);
} }
inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
ggml_tensor *dst, const float *src0_dd, // TODO: remove duplicate variables
const float *src1_dd, float *dst_dd, const float * src0_dd = static_cast<float *>(dst->src[0]->data);
const queue_ptr &main_stream) { const float * src1_dd = static_cast<float *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
const dpct::queue_ptr main_stream = ctx.stream();
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, dst->src[0], dst->src[1], dst, src0_dd, src1_dd, dst_dd, main_stream);
} }
inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
ggml_tensor *dst, const float *src0_dd, // TODO: remove duplicate variables
const float *src1_dd, float *dst_dd, const float * src0_dd = static_cast<float *>(dst->src[0]->data);
const queue_ptr &main_stream) { const float * src1_dd = static_cast<float *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
const dpct::queue_ptr main_stream = ctx.stream();
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_div>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_div>>(ctx, dst->src[0], dst->src[1], dst, src0_dd, src1_dd, dst_dd, main_stream);
} }
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt); ggml_sycl_op_sqrt(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin); ggml_sycl_op_sin(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos); ggml_sycl_op_cos(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc); ggml_sycl_op_acc(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu); ggml_sycl_op_gelu(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu); ggml_sycl_op_silu(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick); ggml_sycl_op_gelu_quick(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh); ggml_sycl_op_tanh(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu); ggml_sycl_op_relu(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid); ggml_sycl_op_sigmoid(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid); ggml_sycl_op_hardsigmoid(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish); ggml_sycl_op_hardswish(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp); ggml_sycl_op_exp(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log); ggml_sycl_op_log(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg); ggml_sycl_op_neg(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step); ggml_sycl_op_step(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu); ggml_sycl_op_leaky_relu(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr); ggml_sycl_op_sqr(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale); ggml_sycl_op_upscale(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad); ggml_sycl_op_pad(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
@ -1007,24 +921,24 @@ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add); ggml_sycl_op_add(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub); ggml_sycl_op_sub(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul); ggml_sycl_op_mul(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div); ggml_sycl_op_div(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }

View file

@ -1897,12 +1897,9 @@ static void pool2d_nchw_kernel(
} }
template <int qk, int qr, dequantize_kernel_t dq> template <int qk, int qr, dequantize_kernel_t dq>
static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, static void get_rows_sycl(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
ggml_tensor *dst, const void *src0_dd,
const int32_t *src1_dd, float *dst_dd,
queue_ptr stream) {
GGML_TENSOR_BINARY_OP_LOCALS GGML_SYCL_TENSOR_BINARY_OP_LOCALS
const sycl::range<3> block_dims(1, 1, SYCL_GET_ROWS_BLOCK_SIZE); const sycl::range<3> block_dims(1, 1, SYCL_GET_ROWS_BLOCK_SIZE);
const int block_num_x = (ne00 + 2*SYCL_GET_ROWS_BLOCK_SIZE - 1) / (2*SYCL_GET_ROWS_BLOCK_SIZE); const int block_num_x = (ne00 + 2*SYCL_GET_ROWS_BLOCK_SIZE - 1) / (2*SYCL_GET_ROWS_BLOCK_SIZE);
@ -1914,12 +1911,17 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
const size_t s2 = nb2 / ggml_element_size(dst); const size_t s2 = nb2 / ggml_element_size(dst);
const size_t s3 = nb3 / ggml_element_size(dst); const size_t s3 = nb3 / ggml_element_size(dst);
const size_t s10 = nb10 / ggml_element_size(src1); const size_t s10 = nb10 / ggml_element_size(dst->src[1]);
const size_t s11 = nb11 / ggml_element_size(src1); const size_t s11 = nb11 / ggml_element_size(dst->src[1]);
const size_t s12 = nb12 / ggml_element_size(src1); const size_t s12 = nb12 / ggml_element_size(dst->src[1]);
//const size_t s13 = nb13 / ggml_element_size(src1); //const size_t s13 = nb13 / ggml_element_size(dst->src[1]);
GGML_ASSERT(ne00 % 2 == 0); GGML_ASSERT(ne00 % 2 == 0);
const void * src0_dd = dst->src[0]->data;
const int32_t * src1_dd = static_cast<const int32_t *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr stream = ctx.stream();
stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1) { [=](sycl::nd_item<3> item_ct1) {
@ -1928,17 +1930,12 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1); s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
}); });
GGML_UNUSED(dst);
GGML_UNUSED(ctx);
} }
template <typename src0_t> template <typename src0_t>
static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const src0_t *src0_dd, const int32_t *src1_dd,
float *dst_dd, queue_ptr stream) {
GGML_TENSOR_BINARY_OP_LOCALS GGML_SYCL_TENSOR_BINARY_OP_LOCALS
const sycl::range<3> block_dims(1, 1, SYCL_GET_ROWS_BLOCK_SIZE); const sycl::range<3> block_dims(1, 1, SYCL_GET_ROWS_BLOCK_SIZE);
const int block_num_x = (ne00 + SYCL_GET_ROWS_BLOCK_SIZE - 1) / SYCL_GET_ROWS_BLOCK_SIZE; const int block_num_x = (ne00 + SYCL_GET_ROWS_BLOCK_SIZE - 1) / SYCL_GET_ROWS_BLOCK_SIZE;
@ -1950,10 +1947,15 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
const size_t s2 = nb2 / ggml_element_size(dst); const size_t s2 = nb2 / ggml_element_size(dst);
const size_t s3 = nb3 / ggml_element_size(dst); const size_t s3 = nb3 / ggml_element_size(dst);
const size_t s10 = nb10 / ggml_element_size(src1); const size_t s10 = nb10 / ggml_element_size(dst->src[1]);
const size_t s11 = nb11 / ggml_element_size(src1); const size_t s11 = nb11 / ggml_element_size(dst->src[1]);
const size_t s12 = nb12 / ggml_element_size(src1); const size_t s12 = nb12 / ggml_element_size(dst->src[1]);
//const size_t s13 = nb13 / ggml_element_size(src1); //const size_t s13 = nb13 / ggml_element_size(dst->src[1]);
const src0_t * src0_dd = static_cast<const src0_t *>(dst->src[0]->data);
const int32_t * src1_dd = static_cast<const int32_t *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr stream = ctx.stream();
{ {
dpct::has_capability_or_fail(stream->get_device(), dpct::has_capability_or_fail(stream->get_device(),
@ -1966,9 +1968,6 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1); s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
}); });
} }
GGML_UNUSED(dst);
GGML_UNUSED(ctx);
} }
static void quantize_row_q8_1_sycl(const float *x, void *vy, const int kx, static void quantize_row_q8_1_sycl(const float *x, void *vy, const int kx,
@ -2494,62 +2493,53 @@ catch (sycl::exception const &exc) {
std::exit(1); std::exit(1);
} }
static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_d, const float *src1_d,
float *dst_d, const queue_ptr &stream) {
GGML_ASSERT(src1->type == GGML_TYPE_I32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(src0->nb[0] == ggml_type_size(src0->type)); GGML_ASSERT(dst->src[0]->nb[0] == ggml_type_size(dst->src[0]->type));
GGML_ASSERT(src1->nb[0] == ggml_type_size(src1->type)); GGML_ASSERT(dst->src[1]->nb[0] == ggml_type_size(dst->src[1]->type));
GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type)); GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type));
const int32_t * src1_i32 = (const int32_t *) src1_d; switch (dst->src[0]->type) {
switch (src0->type) {
case GGML_TYPE_F16: case GGML_TYPE_F16:
get_rows_sycl_float(ctx, src0, src1, dst, (const sycl::half *)src0_d, get_rows_sycl_float<sycl::half>(ctx, dst);
src1_i32, dst_d, stream);
break; break;
case GGML_TYPE_F32: case GGML_TYPE_F32:
get_rows_sycl_float(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); get_rows_sycl_float<float>(ctx, dst);
break; break;
case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_0:
get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, dst);
break; break;
case GGML_TYPE_Q4_1: case GGML_TYPE_Q4_1:
get_rows_sycl<QK4_1, QR4_1, dequantize_q4_1>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); get_rows_sycl<QK4_1, QR4_1, dequantize_q4_1>(ctx, dst);
break; break;
case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_0:
get_rows_sycl<QK5_0, QR5_0, dequantize_q5_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); get_rows_sycl<QK5_0, QR5_0, dequantize_q5_0>(ctx, dst);
break; break;
case GGML_TYPE_Q5_1: case GGML_TYPE_Q5_1:
get_rows_sycl<QK5_1, QR5_1, dequantize_q5_1>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); get_rows_sycl<QK5_1, QR5_1, dequantize_q5_1>(ctx, dst);
break; break;
case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_0:
get_rows_sycl<QK8_0, QR8_0, dequantize_q8_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); get_rows_sycl<QK8_0, QR8_0, dequantize_q8_0>(ctx, dst);
break; break;
default: default:
// TODO: k-quants // TODO: k-quants
GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(dst->src[0]->type));
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
break; break;
} }
} }
static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst, // TODO: remove duplicate variables
const float *src0_d, const float *src1_d, const float * src0_d = static_cast<float *>(dst->src[0]->data);
float *dst_d, float * dst_d = static_cast<float *>(dst->data);
const queue_ptr &main_stream) { dpct::queue_ptr main_stream = ctx.stream();
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, src0, dst, nullptr, src0_d, dst_d, main_stream); ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, dst->src[0], dst, nullptr, src0_d, dst_d, main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(src1_d);
} }
@ -2685,13 +2675,10 @@ catch (sycl::exception const &exc) {
std::exit(1); std::exit(1);
} }
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd, const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const int32_t * opts = (const int32_t *)dst->op_params; const int32_t * opts = (const int32_t *)dst->op_params;
enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]); enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);
@ -2702,8 +2689,8 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
const int p0 = opts[5]; const int p0 = opts[5];
const int p1 = opts[6]; const int p1 = opts[6];
const int64_t IH = src0->ne[1]; const int64_t IH = dst->src[0]->ne[1];
const int64_t IW = src0->ne[0]; const int64_t IW = dst->src[0]->ne[0];
const int64_t N = dst->ne[3]; const int64_t N = dst->ne[3];
const int64_t OC = dst->ne[2]; const int64_t OC = dst->ne[2];
@ -2712,7 +2699,10 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
const int parallel_elements = N * OC * OH * OW; const int parallel_elements = N * OC * OH * OW;
const int num_blocks = (parallel_elements + SYCL_POOL2D_BLOCK_SIZE - 1) / SYCL_POOL2D_BLOCK_SIZE; const int num_blocks = (parallel_elements + SYCL_POOL2D_BLOCK_SIZE - 1) / SYCL_POOL2D_BLOCK_SIZE;
sycl::range<3> block_nums(1, 1, num_blocks); dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
sycl::range<3> block_nums(1, 1, num_blocks);
main_stream->parallel_for( main_stream->parallel_for(
sycl::nd_range<3>(block_nums * sycl::nd_range<3>(block_nums *
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE),
@ -2722,163 +2712,122 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
parallel_elements, src0_dd, dst_dd, op, parallel_elements, src0_dd, dst_dd, op,
item_ct1); item_ct1);
}); });
GGML_UNUSED(src1);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst, GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
const float *src0_dd, const float *src1_dd, GGML_ASSERT(dst->type == GGML_TYPE_F32);
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
const int64_t ne = ggml_nelements(src0); const int64_t ne = ggml_nelements(dst->src[0]);
dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream); sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const int64_t ncols = src0->ne[0]; const int64_t ncols = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(dst->src[0]);
dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream); sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_I32);
const int64_t ncols = src0->ne[0]; const int64_t ncols = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(dst->src[0]);
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0]; enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
int32_t * dst_dd = static_cast<int32_t *>(dst->data);
argsort_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, order, main_stream); argsort_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, order, main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst, GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_I32);
const int64_t ncols = src0->ne[0]; const int64_t ncols = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(dst->src[0]);
argmax_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, main_stream); dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
int32_t * dst_dd = static_cast<int32_t *>(dst->data);
GGML_UNUSED(src1); argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1,
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const int64_t ne00 = src0->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t ne01 = src0->ne[1]; const int64_t ne01 = dst->src[0]->ne[1];
const int nrows0 = ggml_nrows(src0); const int nrows0 = ggml_nrows(dst->src[0]);
const int n_past = ((int32_t *) dst->op_params)[0]; const int n_past = ((int32_t *) dst->op_params)[0];
dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream); diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
float scale; float scale;
memcpy(&scale, dst->op_params, sizeof(float)); memcpy(&scale, dst->op_params, sizeof(float));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream); dpct::queue_ptr main_stream = ctx.stream();
scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream);
/* /*
DPCT1010:87: SYCL uses exceptions to report errors and does not use the DPCT1010:87: SYCL uses exceptions to report errors and does not use the
error codes. The call was replaced with 0. You need to rewrite this code. error codes. The call was replaced with 0. You need to rewrite this code.
*/ */
SYCL_CHECK(0); SYCL_CHECK(0);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
ggml_tensor *dst, const float *src0_dd,
const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
float min; float min;
float max; float max;
memcpy(&min, dst->op_params, sizeof(float)); memcpy(&min, dst->op_params, sizeof(float));
memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
const dpct::queue_ptr main_stream = ctx.stream();
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(src0), main_stream); clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(dst->src[0]), main_stream);
/* /*
DPCT1010:88: SYCL uses exceptions to report errors and does not use the DPCT1010:88: SYCL uses exceptions to report errors and does not use the
error codes. The call was replaced with 0. You need to rewrite this code. error codes. The call was replaced with 0. You need to rewrite this code.
*/ */
SYCL_CHECK(0); SYCL_CHECK(0);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }
static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) { static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
@ -3247,33 +3196,21 @@ catch (sycl::exception const &exc) {
} }
static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm); ggml_sycl_op_norm(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm); ggml_sycl_op_rms_norm(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm); ggml_sycl_op_group_norm(ctx, dst);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
@ -3646,7 +3583,7 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
} }
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
ggml_tensor *dst) try { ggml_tensor * dst) try {
const ggml_tensor *src0 = dst->src[0]; const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1]; const ggml_tensor *src1 = dst->src[1];
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers"); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
@ -3815,22 +3752,21 @@ catch (sycl::exception const &exc) {
} }
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale); ggml_sycl_op_scale(ctx, dst);
} }
static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp); ggml_sycl_op_clamp(ctx, dst);
} }
static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
ggml_tensor *dst) try {
const int64_t ne = ggml_nelements(src0); const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1)); GGML_ASSERT(ne == ggml_nelements(src1));
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
GGML_TENSOR_BINARY_OP_LOCALS01; GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS;
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
queue_ptr main_stream = ctx.stream(); queue_ptr main_stream = ctx.stream();
@ -3861,7 +3797,6 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
ggml_type_name(src0->type), ggml_type_name(src1->type)); ggml_type_name(src0->type), ggml_type_name(src1->type));
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
GGML_UNUSED(dst);
} }
catch (sycl::exception const &exc) { catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@ -3871,44 +3806,39 @@ catch (sycl::exception const &exc) {
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
// TODO: why do we pass dst as src1 here? // TODO: why do we pass dst as src1 here?
ggml_sycl_cpy(ctx, dst->src[0], dst, nullptr); ggml_sycl_cpy(ctx, dst->src[0], dst);
} }
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf); ggml_sycl_op_diag_mask_inf(ctx, dst);
} }
static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope); ggml_sycl_op_rope(ctx, dst);
} }
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d); ggml_sycl_op_pool2d(ctx, dst);
} }
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col); ggml_sycl_op_im2col(ctx, dst);
} }
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(dst->src[0])); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum); ggml_sycl_op_sum(ctx, dst);
} }
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(dst->src[0])); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows); ggml_sycl_op_sum_rows(ctx, dst);
} }
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(dst->src[0])); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort); ggml_sycl_op_argsort(ctx, dst);
}
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax);
} }
@ -3942,138 +3872,138 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
switch (dst->op) { switch (dst->op) {
case GGML_OP_ARGMAX: case GGML_OP_ARGMAX:
ggml_sycl_argmax(ctx, dst); ggml_sycl_op_argmax(ctx, dst); // done
break; break;
case GGML_OP_CONV_TRANSPOSE_1D: case GGML_OP_CONV_TRANSPOSE_1D:
ggml_sycl_op_conv_transpose_1d(ctx, dst); ggml_sycl_op_conv_transpose_1d(ctx, dst); // already good
break; break;
case GGML_OP_REPEAT: case GGML_OP_REPEAT:
ggml_sycl_repeat(ctx, dst); ggml_sycl_op_repeat(ctx, dst); // partially done
break; break;
case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS:
ggml_sycl_get_rows(ctx, dst); ggml_sycl_op_get_rows(ctx, dst); // done
break; break;
case GGML_OP_DUP: case GGML_OP_DUP:
ggml_sycl_dup(ctx, dst); ggml_sycl_dup(ctx, dst); // done
break; break;
case GGML_OP_ADD: case GGML_OP_ADD:
case GGML_OP_ADD1: // TODO: more efficient implementation case GGML_OP_ADD1: // TODO: more efficient implementation
ggml_sycl_add(ctx, dst); ggml_sycl_add(ctx, dst); // partially done
break; break;
case GGML_OP_SUB: case GGML_OP_SUB:
ggml_sycl_sub(ctx, dst); ggml_sycl_sub(ctx, dst); // partially done
break; break;
case GGML_OP_ACC: case GGML_OP_ACC:
ggml_sycl_acc(ctx, dst); ggml_sycl_acc(ctx, dst); // fully done
break; break;
case GGML_OP_MUL: case GGML_OP_MUL:
ggml_sycl_mul(ctx, dst); ggml_sycl_mul(ctx, dst); // partially done
break; break;
case GGML_OP_LOG: case GGML_OP_LOG:
ggml_sycl_log(ctx, dst); ggml_sycl_log(ctx, dst); // fully done
break; break;
case GGML_OP_DIV: case GGML_OP_DIV:
ggml_sycl_div(ctx, dst); ggml_sycl_div(ctx, dst); // partially done
break; break;
case GGML_OP_UNARY: case GGML_OP_UNARY:
switch (ggml_get_unary_op(dst)) { switch (ggml_get_unary_op(dst)) {
case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_NEG:
ggml_sycl_neg(ctx, dst); ggml_sycl_neg(ctx, dst); // done
break; break;
case GGML_UNARY_OP_STEP: case GGML_UNARY_OP_STEP:
ggml_sycl_step(ctx, dst); ggml_sycl_step(ctx, dst); // done
break; break;
case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_GELU:
ggml_sycl_gelu(ctx, dst); ggml_sycl_gelu(ctx, dst); // done
break; break;
case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_SILU:
ggml_sycl_silu(ctx, dst); ggml_sycl_silu(ctx, dst); // done
break; break;
case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_GELU_QUICK:
ggml_sycl_gelu_quick(ctx, dst); ggml_sycl_gelu_quick(ctx, dst); // done
break; break;
case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_TANH:
ggml_sycl_tanh(ctx, dst); ggml_sycl_tanh(ctx, dst); // done
break; break;
case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_RELU:
ggml_sycl_relu(ctx, dst); ggml_sycl_relu(ctx, dst); // done
break; break;
case GGML_UNARY_OP_SIGMOID: case GGML_UNARY_OP_SIGMOID:
ggml_sycl_sigmoid(ctx, dst); ggml_sycl_sigmoid(ctx, dst); // done
break; break;
case GGML_UNARY_OP_HARDSIGMOID: case GGML_UNARY_OP_HARDSIGMOID:
ggml_sycl_hardsigmoid(ctx, dst); ggml_sycl_hardsigmoid(ctx, dst); // done
break; break;
case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_HARDSWISH:
ggml_sycl_hardswish(ctx, dst); ggml_sycl_hardswish(ctx, dst); // done
break; break;
case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_EXP:
ggml_sycl_exp(ctx, dst); ggml_sycl_exp(ctx, dst); // done
break; break;
default: default:
return false; return false;
} }
break; break;
case GGML_OP_NORM: case GGML_OP_NORM:
ggml_sycl_norm(ctx, dst); ggml_sycl_norm(ctx, dst); // done
break; break;
case GGML_OP_GROUP_NORM: case GGML_OP_GROUP_NORM:
ggml_sycl_group_norm(ctx, dst); ggml_sycl_group_norm(ctx, dst); // done
break; break;
case GGML_OP_CONCAT: case GGML_OP_CONCAT:
ggml_sycl_op_concat(ctx, dst); ggml_sycl_op_concat(ctx, dst); // already good
break; break;
case GGML_OP_UPSCALE: case GGML_OP_UPSCALE:
ggml_sycl_upscale(ctx, dst); ggml_sycl_upscale(ctx, dst); // done
break; break;
case GGML_OP_PAD: case GGML_OP_PAD:
ggml_sycl_pad(ctx, dst); ggml_sycl_pad(ctx, dst); // done
break; break;
case GGML_OP_LEAKY_RELU: case GGML_OP_LEAKY_RELU:
ggml_sycl_leaky_relu(ctx, dst); ggml_sycl_leaky_relu(ctx, dst); // done
break; break;
case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM:
ggml_sycl_rms_norm(ctx, dst); ggml_sycl_rms_norm(ctx, dst); // done
break; break;
case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT:
if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) { if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
return false; return false;
} }
/* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */ /* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */
ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst); ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst); // good
break; break;
case GGML_OP_MUL_MAT_ID: case GGML_OP_MUL_MAT_ID:
if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) { if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
return false; return false;
} }
ggml_sycl_mul_mat_id(ctx, dst); ggml_sycl_mul_mat_id(ctx, dst); // good
break; break;
case GGML_OP_OUT_PROD: case GGML_OP_OUT_PROD:
ggml_sycl_op_out_prod(ctx, dst); ggml_sycl_op_out_prod(ctx, dst); // good
break; break;
case GGML_OP_SCALE: case GGML_OP_SCALE:
ggml_sycl_scale(ctx, dst); ggml_sycl_scale(ctx, dst); // done
break; break;
case GGML_OP_SQR: case GGML_OP_SQR:
ggml_sycl_sqr(ctx, dst); ggml_sycl_sqr(ctx, dst); // done
break; break;
case GGML_OP_SQRT: case GGML_OP_SQRT:
ggml_sycl_sqrt(ctx, dst); ggml_sycl_sqrt(ctx, dst); // done
break; break;
case GGML_OP_SIN: case GGML_OP_SIN:
ggml_sycl_sin(ctx, dst); ggml_sycl_sin(ctx, dst); //done
break; break;
case GGML_OP_COS: case GGML_OP_COS:
ggml_sycl_cos(ctx, dst); ggml_sycl_cos(ctx, dst); // done
break; break;
case GGML_OP_CLAMP: case GGML_OP_CLAMP:
ggml_sycl_clamp(ctx, dst); ggml_sycl_clamp(ctx, dst); // done
break; break;
case GGML_OP_CPY: case GGML_OP_CPY:
ggml_sycl_cpy(ctx, dst->src[0], dst->src[1], dst); ggml_sycl_cpy(ctx, dst->src[0], dst->src[1]); // okayish, need check
break; break;
case GGML_OP_CONT: case GGML_OP_CONT:
ggml_sycl_dup(ctx, dst); ggml_sycl_dup(ctx, dst); // done
break; break;
case GGML_OP_NONE: case GGML_OP_NONE:
case GGML_OP_RESHAPE: case GGML_OP_RESHAPE:
@ -4083,34 +4013,34 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__); GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__);
break; break;
case GGML_OP_DIAG_MASK_INF: case GGML_OP_DIAG_MASK_INF:
ggml_sycl_diag_mask_inf(ctx, dst); ggml_sycl_diag_mask_inf(ctx, dst); // done
break; break;
case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX:
ggml_sycl_op_soft_max(ctx, dst); ggml_sycl_op_soft_max(ctx, dst); // already good
break; break;
case GGML_OP_ROPE: case GGML_OP_ROPE:
ggml_sycl_rope(ctx, dst); ggml_sycl_rope(ctx, dst); // done
break; break;
case GGML_OP_IM2COL: case GGML_OP_IM2COL:
ggml_sycl_im2col(ctx, dst); ggml_sycl_im2col(ctx, dst); // done
break; break;
case GGML_OP_POOL_2D: case GGML_OP_POOL_2D:
ggml_sycl_pool2d(ctx, dst); ggml_sycl_pool2d(ctx, dst); // done
break; break;
case GGML_OP_SUM: case GGML_OP_SUM:
ggml_sycl_sum(ctx, dst); ggml_sycl_sum(ctx, dst); // done
break; break;
case GGML_OP_SUM_ROWS: case GGML_OP_SUM_ROWS:
ggml_sycl_sum_rows(ctx, dst); ggml_sycl_sum_rows(ctx, dst); // done
break; break;
case GGML_OP_ARGSORT: case GGML_OP_ARGSORT:
ggml_sycl_argsort(ctx, dst); ggml_sycl_argsort(ctx, dst); // done
break; break;
case GGML_OP_TIMESTEP_EMBEDDING: case GGML_OP_TIMESTEP_EMBEDDING:
ggml_sycl_op_timestep_embedding(ctx, dst); ggml_sycl_op_timestep_embedding(ctx, dst); // already pretty good
break; break;
case GGML_OP_RWKV_WKV6: case GGML_OP_RWKV_WKV6:
ggml_sycl_op_rwkv_wkv6(ctx, dst); ggml_sycl_op_rwkv_wkv6(ctx, dst); // good
break; break;
case GGML_OP_GATED_LINEAR_ATTN: case GGML_OP_GATED_LINEAR_ATTN:
ggml_sycl_op_gated_linear_attn(ctx, dst); ggml_sycl_op_gated_linear_attn(ctx, dst);

View file

@ -82,13 +82,10 @@ static void im2col_sycl(
} }
} }
void ggml_sycl_op_im2col( void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
@ -100,27 +97,28 @@ void ggml_sycl_op_im2col(
const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1; const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
const int64_t IC = src1->ne[is_2D ? 2 : 1]; const int64_t IC = dst->src[1]->ne[is_2D ? 2 : 1];
const int64_t IH = is_2D ? src1->ne[1] : 1; const int64_t IH = is_2D ? dst->src[1]->ne[1] : 1;
const int64_t IW = src1->ne[0]; const int64_t IW = dst->src[1]->ne[0];
const int64_t KH = is_2D ? src0->ne[1] : 1; const int64_t KH = is_2D ? dst->src[0]->ne[1] : 1;
const int64_t KW = src0->ne[0]; const int64_t KW = dst->src[0]->ne[0];
const int64_t OH = is_2D ? dst->ne[2] : 1; const int64_t OH = is_2D ? dst->ne[2] : 1;
const int64_t OW = dst->ne[1]; const int64_t OW = dst->ne[1];
const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 const size_t delta_offset = dst->src[1]->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
const int64_t batch = src1->ne[3]; const int64_t batch = dst->src[1]->ne[3];
const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32 const size_t batch_offset = dst->src[1]->nb[3] / 4; // nb is byte offset, src is type float32
dpct::queue_ptr main_stream = ctx.stream();
if (dst->type == GGML_TYPE_F16) { if (dst->type == GGML_TYPE_F16) {
im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
sycl::half * dst_dd = static_cast<sycl::half *>(dst->data);
im2col_sycl(src1_dd, dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
} else { } else {
im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
float * dst_dd = static_cast<float *>(dst->data);
im2col_sycl(src1_dd, dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
} }
GGML_UNUSED(src0);
GGML_UNUSED(src0_dd);
GGML_UNUSED(ctx);
} }

View file

@ -15,9 +15,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_im2col( void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd,
const queue_ptr &main_stream);
#endif // GGML_SYCL_IM2COL_HPP #endif // GGML_SYCL_IM2COL_HPP

View file

@ -311,34 +311,27 @@ static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols,
} }
} }
void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1, void ggml_sycl_op_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_tensor* dst, const float* src0_dd,
const float* src1_dd, float* dst_dd,
const queue_ptr& main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const int64_t ne00 = src0->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(dst->src[0]);
float eps; float eps;
memcpy(&eps, dst->op_params, sizeof(float)); memcpy(&eps, dst->op_params, sizeof(float));
norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
(void)src1; norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
(void)dst;
(void)src1_dd;
} }
void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const ggml_tensor* src1, ggml_tensor* dst,
const float* src0_dd, const float* src1_dd,
float* dst_dd,
const queue_ptr& main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
int num_groups = dst->op_params[0]; int num_groups = dst->op_params[0];
@ -346,33 +339,26 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor*
float eps; float eps;
memcpy(&eps, dst->op_params + 1, sizeof(float)); memcpy(&eps, dst->op_params + 1, sizeof(float));
int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); int group_size = dst->src[0]->ne[0] * dst->src[0]->ne[1] * ((dst->src[0]->ne[2] + num_groups - 1) / num_groups);
group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
(void)src1; dpct::queue_ptr main_stream = ctx.stream();
(void)dst; group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, dst->src[0]->ne[0] * dst->src[0]->ne[1] * dst->src[0]->ne[2], main_stream, ctx.device);
(void)src1_dd;
GGML_UNUSED(ctx);
} }
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor* src1, ggml_tensor* dst,
const float* src0_dd, const float* src1_dd,
float* dst_dd,
const queue_ptr& main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
const int64_t ne00 = src0->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(dst->src[0]);
float eps; float eps;
memcpy(&eps, dst->op_params, sizeof(float)); memcpy(&eps, dst->op_params, sizeof(float));
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
dpct::queue_ptr main_stream = ctx.stream();
rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device); rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
(void)src1;
(void)dst;
(void)src1_dd;
} }

View file

@ -15,21 +15,10 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1, void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor * dst);
ggml_tensor* dst, const float* src0_dd,
const float* src1_dd, float* dst_dd,
const queue_ptr& main_stream);
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, ggml_tensor * dst);
const ggml_tensor* src1, ggml_tensor* dst,
const float* src0_dd, const float* src1_dd,
float* dst_dd,
const queue_ptr& main_stream);
void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor * dst);
const ggml_tensor* src1, ggml_tensor* dst,
const float* src0_dd, const float* src1_dd,
float* dst_dd,
const queue_ptr& main_stream);
#endif // GGML_SYCL_NORM_HPP #endif // GGML_SYCL_NORM_HPP

View file

@ -192,18 +192,15 @@ static void rope_neox_sycl(
} }
} }
void ggml_sycl_op_rope( void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd, float *dst_dd, const queue_ptr &main_stream) {
const ggml_tensor * src2 = dst->src[2];
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
GGML_ASSERT(src0->type == dst->type); GGML_ASSERT(dst->src[0]->type == dst->type);
const int64_t ne00 = src0->ne[0]; const int64_t ne00 = dst->src[0]->ne[0];
const int64_t ne01 = src0->ne[1]; const int64_t ne01 = dst->src[0]->ne[1];
const int64_t nr = ggml_nrows(src0); const int64_t nr = ggml_nrows(dst->src[0]);
//const int n_past = ((int32_t *) dst->op_params)[0]; //const int n_past = ((int32_t *) dst->op_params)[0];
const int n_dims = ((int32_t *) dst->op_params)[1]; const int n_dims = ((int32_t *) dst->op_params)[1];
@ -228,49 +225,49 @@ void ggml_sycl_op_rope(
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
const int32_t * pos = (const int32_t *) src1_dd; const int32_t * pos = static_cast<const int32_t *>(dst->src[1]->data);
const float * freq_factors = nullptr; const float * freq_factors = nullptr;
if (src2 != nullptr) { if (dst->src[2] != nullptr) {
freq_factors = (const float *) src2->data; freq_factors = static_cast<const float *>(dst->src[2]->data);
} }
rope_corr_dims corr_dims; rope_corr_dims corr_dims;
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v); ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v);
dpct::queue_ptr main_stream = ctx.stream();
// compute // compute
if (is_neox) { if (is_neox) {
if (src0->type == GGML_TYPE_F32) { if (dst->src[0]->type == GGML_TYPE_F32) {
rope_neox_sycl( const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
(const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, float * dst_dd = static_cast<float *>(dst->data);
attn_factor, corr_dims, freq_factors, main_stream
); rope_neox_sycl(src0_dd, dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01,
} else if (src0->type == GGML_TYPE_F16) { freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream);
rope_neox_sycl( } else if (dst->src[0]->type == GGML_TYPE_F16) {
(const sycl::half *)src0_dd, (sycl::half *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, const sycl::half * src0_dd = static_cast<const sycl::half *>(dst->src[0]->data);
attn_factor, corr_dims, freq_factors, main_stream sycl::half * dst_dd = static_cast<sycl::half *>(dst->data);
); rope_neox_sycl(src0_dd, dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01,
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream);
} else { } else {
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
} else { } else {
if (src0->type == GGML_TYPE_F32) { if (dst->src[0]->type == GGML_TYPE_F32) {
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
float * dst_dd = static_cast<float *>(dst->data);
rope_norm_sycl( rope_norm_sycl(
(const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, src0_dd, dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
attn_factor, corr_dims, freq_factors, main_stream attn_factor, corr_dims, freq_factors, main_stream
); );
} else if (src0->type == GGML_TYPE_F16) { } else if (dst->src[0]->type == GGML_TYPE_F16) {
const sycl::half * src0_dd = static_cast<const sycl::half *>(dst->src[0]->data);
sycl::half * dst_dd = static_cast<sycl::half *>(dst->data);
rope_norm_sycl( rope_norm_sycl(
(const sycl::half *)src0_dd, (sycl::half *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, src0_dd, dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
attn_factor, corr_dims, freq_factors, main_stream attn_factor, corr_dims, freq_factors, main_stream
); );
} else { } else {
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
} }
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
} }

View file

@ -15,8 +15,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_rope( void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd, float *dst_dd, const queue_ptr &main_stream);
#endif // GGML_SYCL_ROPE_HPP #endif // GGML_SYCL_ROPE_HPP

View file

@ -57,9 +57,8 @@ static void timestep_embedding_f32_sycl(
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src0 = dst->src[0]; const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1]; const float * src0_d = static_cast<const float *>(src0->data);
const float * src0_d = (const float *)src0->data; float * dst_d = static_cast<float *>(dst->data);
float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream(); dpct::queue_ptr stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(src0->type == GGML_TYPE_F32);
@ -69,5 +68,4 @@ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tenso
const int max_period = dst->op_params[1]; const int max_period = dst->op_params[1];
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream); timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
GGML_UNUSED(src1);
} }

View file

@ -97,9 +97,6 @@ static void rwkv_wkv_f32_kernel(
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const float* k_d = (const float*)dst->src[0]->data; const float* k_d = (const float*)dst->src[0]->data;
const float* v_d = (const float*)dst->src[1]->data; const float* v_d = (const float*)dst->src[1]->data;
const float* r_d = (const float*)dst->src[2]->data; const float* r_d = (const float*)dst->src[2]->data;
@ -138,6 +135,4 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
}); });
}); });
GGML_UNUSED(src0);
GGML_UNUSED(src1);
} }