Try to reduce some unused and typecast warnings
This commit is contained in:
parent
43ed389a3f
commit
3930184d14
11 changed files with 64 additions and 23 deletions
|
@ -626,6 +626,7 @@ struct bin_bcast_sycl {
|
|||
});
|
||||
}
|
||||
}
|
||||
(void) ctx;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst,
|
|||
// operation
|
||||
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
|
||||
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
|
||||
if (item_ct1.get_group(1) < ne01) { // src0
|
||||
if (item_ct1.get_group(1) < (size_t) ne01) { // src0
|
||||
int offset_src =
|
||||
nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
|
||||
dst[offset_dst] = x[offset_src];
|
||||
|
@ -70,7 +70,7 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst,
|
|||
// operation
|
||||
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
|
||||
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
|
||||
if (item_ct1.get_group(0) < ne02) { // src0
|
||||
if (item_ct1.get_group(0) < (size_t) ne02) { // src0
|
||||
int offset_src = nidx + item_ct1.get_group(1) * ne0 +
|
||||
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
|
||||
dst[offset_dst] = x[offset_src];
|
||||
|
|
|
@ -1237,7 +1237,7 @@ namespace dpct
|
|||
|
||||
std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
|
||||
{
|
||||
auto it = m_map.upper_bound((byte_t *)ptr);
|
||||
auto it = m_map.upper_bound(const_cast<byte_t *>(reinterpret_cast<const byte_t *>(ptr)));
|
||||
if (it == m_map.end())
|
||||
{
|
||||
// Not a virtual pointer.
|
||||
|
|
|
@ -237,7 +237,7 @@ void upscale_f32(const float *x, float *dst, const int nb00, const int nb01,
|
|||
int i02 = i12 / sf2;
|
||||
int i03 = i13 / sf3;
|
||||
|
||||
dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
|
||||
dst[index] = *(float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
|
||||
}
|
||||
|
||||
void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
|
||||
|
@ -523,6 +523,7 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -538,6 +539,7 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
const ggml_tensor *src1, ggml_tensor *dst,
|
||||
|
@ -553,6 +555,7 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -567,6 +570,7 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -582,6 +586,7 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -598,6 +603,7 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -613,6 +619,7 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -628,6 +635,7 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -643,6 +651,7 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -658,6 +667,7 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -673,6 +683,7 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -688,6 +699,7 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -703,6 +715,7 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -718,6 +731,7 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -733,6 +747,7 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -752,6 +767,7 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -767,6 +783,7 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -790,6 +807,7 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -808,6 +826,7 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -828,6 +847,7 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
|
||||
|
||||
(void) dst;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
|
|
@ -2640,6 +2640,7 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
|
|||
|
||||
(void) src1;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -2657,6 +2658,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -2676,6 +2678,7 @@ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_te
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -2697,6 +2700,7 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_ten
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -2716,6 +2720,7 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tens
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
|
@ -2738,6 +2743,7 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const gg
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -2761,6 +2767,7 @@ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tenso
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||
|
@ -2786,6 +2793,7 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tenso
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
|
||||
|
|
|
@ -122,4 +122,5 @@ void ggml_sycl_op_im2col(
|
|||
|
||||
(void) src0;
|
||||
(void) src0_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
|
|
@ -753,10 +753,10 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
|
|||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, QK_WARP_SIZE);
|
||||
{
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
//TODO: What's the purpose of these?
|
||||
//auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||
//auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
|
@ -780,8 +780,9 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy,
|
|||
{
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
// TODO: What's the purpose of these?
|
||||
// auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||
// auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
|
@ -805,8 +806,9 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
|
|||
{
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
// TODO: What's the purpose of these?
|
||||
// auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
||||
// auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
|
@ -830,7 +832,8 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
|
|||
{
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
||||
// TODO: What's the purpose of this?
|
||||
// auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
|
@ -854,8 +857,9 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
|
|||
{
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
// TODO: What's the purpose of these?
|
||||
// auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
||||
// auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
|
@ -954,7 +958,8 @@ void ggml_sycl_op_mul_mat_vec_q(
|
|||
const size_t q8_1_bs = QK8_1;
|
||||
// the main device has a larger memory buffer to hold the results from all GPUs
|
||||
// nrows_dst == nrows of the matrix that the kernel writes into
|
||||
const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
|
||||
// TODO: nrows_dst is unused. Please check.
|
||||
// const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
|
||||
for (int i = 0; i < src1_ncols; i++)
|
||||
{
|
||||
const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;
|
||||
|
|
|
@ -352,6 +352,7 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor*
|
|||
(void)src1;
|
||||
(void)dst;
|
||||
(void)src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
|
||||
|
|
|
@ -272,4 +272,5 @@ void ggml_sycl_op_rope(
|
|||
(void) src1;
|
||||
(void) dst;
|
||||
(void) src1_dd;
|
||||
(void) ctx;
|
||||
}
|
||||
|
|
|
@ -53,8 +53,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
|
|||
if (block_size > WARP_SIZE) {
|
||||
if (warp_id == 0) {
|
||||
buf[lane_id] = -INFINITY;
|
||||
for (size_t i = 1; i < nreduce; i += 1)
|
||||
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
|
||||
buf[lane_id + i * WARP_SIZE] = -INFINITY;
|
||||
}
|
||||
}
|
||||
item_ct1.barrier(sycl::access::fence_space::local_space);
|
||||
|
||||
|
@ -63,8 +64,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
|
|||
}
|
||||
item_ct1.barrier(sycl::access::fence_space::local_space);
|
||||
max_val = buf[lane_id];
|
||||
for (size_t i = 1; i < nreduce; i += 1)
|
||||
{
|
||||
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
|
||||
max_val = std::max(max_val, buf[lane_id + i * WARP_SIZE]);
|
||||
}
|
||||
max_val = warp_reduce_max(max_val, item_ct1);
|
||||
|
@ -89,8 +89,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
|
|||
item_ct1.barrier(sycl::access::fence_space::local_space);
|
||||
if (warp_id == 0) {
|
||||
buf[lane_id] = 0.f;
|
||||
for (size_t i = 1; i < nreduce; i += 1)
|
||||
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
|
||||
buf[lane_id + i * WARP_SIZE] = 0.f;
|
||||
}
|
||||
}
|
||||
item_ct1.barrier(sycl::access::fence_space::local_space);
|
||||
|
||||
|
@ -100,8 +101,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
|
|||
item_ct1.barrier(sycl::access::fence_space::local_space);
|
||||
|
||||
tmp = buf[lane_id];
|
||||
for (size_t i = 1; i < nreduce; i += 1)
|
||||
{
|
||||
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
|
||||
tmp += buf[lane_id + i * WARP_SIZE];
|
||||
}
|
||||
tmp = warp_reduce_sum(tmp, item_ct1);
|
||||
|
|
|
@ -59,7 +59,8 @@ static void rwkv_wkv_f32_kernel(
|
|||
float y = 0;
|
||||
|
||||
// Process in chunks of 4 for better vectorization
|
||||
sycl::float4 k4, r4, tf4, td4, s4, kv4;
|
||||
// TODO: What's the purpose of kv4?
|
||||
sycl::float4 k4, r4, tf4, td4, s4;
|
||||
#pragma unroll
|
||||
for (int j = 0; j < head_size; j += 4) {
|
||||
// Load data in vec4 chunks
|
||||
|
@ -135,4 +136,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* s
|
|||
);
|
||||
});
|
||||
});
|
||||
// TODO: Why src0 and src1 are unused?
|
||||
(void) src0;
|
||||
(void) src1;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue