Try to reduce some unused and typecast warnings

This commit is contained in:
Akarshan Biswas 2024-12-09 13:45:16 +05:30
parent 43ed389a3f
commit 3930184d14
No known key found for this signature in database
GPG key ID: 52A578A14B32134D
11 changed files with 64 additions and 23 deletions

View file

@ -626,6 +626,7 @@ struct bin_bcast_sycl {
});
}
}
(void) ctx;
}
};

View file

@ -47,7 +47,7 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst,
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
if (item_ct1.get_group(1) < ne01) { // src0
if (item_ct1.get_group(1) < (size_t) ne01) { // src0
int offset_src =
nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
dst[offset_dst] = x[offset_src];
@ -70,7 +70,7 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst,
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
if (item_ct1.get_group(0) < ne02) { // src0
if (item_ct1.get_group(0) < (size_t) ne02) { // src0
int offset_src = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
dst[offset_dst] = x[offset_src];

View file

@ -1237,7 +1237,7 @@ namespace dpct
std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
{
auto it = m_map.upper_bound((byte_t *)ptr);
auto it = m_map.upper_bound(const_cast<byte_t *>(reinterpret_cast<const byte_t *>(ptr)));
if (it == m_map.end())
{
// Not a virtual pointer.

View file

@ -237,7 +237,7 @@ void upscale_f32(const float *x, float *dst, const int nb00, const int nb01,
int i02 = i12 / sf2;
int i03 = i13 / sf3;
dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
dst[index] = *(float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
}
void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
@ -523,6 +523,7 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -538,6 +539,7 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
@ -553,6 +555,7 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -567,6 +570,7 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -582,6 +586,7 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -598,6 +603,7 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -613,6 +619,7 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -628,6 +635,7 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -643,6 +651,7 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -658,6 +667,7 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -673,6 +683,7 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -688,6 +699,7 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -703,6 +715,7 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -718,6 +731,7 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -733,6 +747,7 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -752,6 +767,7 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -767,6 +783,7 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -790,6 +807,7 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -808,6 +826,7 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -828,6 +847,7 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor
acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
(void) dst;
(void) ctx;
}
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,

View file

@ -2640,6 +2640,7 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
(void) src1;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -2657,6 +2658,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -2676,6 +2678,7 @@ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_te
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -2697,6 +2700,7 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_ten
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -2716,6 +2720,7 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tens
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@ -2738,6 +2743,7 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const gg
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -2761,6 +2767,7 @@ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tenso
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -2786,6 +2793,7 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tenso
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}
static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {

View file

@ -122,4 +122,5 @@ void ggml_sycl_op_im2col(
(void) src0;
(void) src0_dd;
(void) ctx;
}

View file

@ -753,10 +753,10 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, QK_WARP_SIZE);
{
stream->submit([&](sycl::handler &cgh) {
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
stream->submit([&](sycl::handler & cgh) {
//TODO: What's the purpose of these?
//auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
//auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -780,8 +780,9 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy,
{
stream->submit([&](sycl::handler &cgh) {
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
// TODO: What's the purpose of these?
// auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
// auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -805,8 +806,9 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
{
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
// TODO: What's the purpose of these?
// auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
// auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -830,7 +832,8 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
{
stream->submit([&](sycl::handler &cgh) {
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
// TODO: What's the purpose of this?
// auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -854,8 +857,9 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
{
stream->submit([&](sycl::handler &cgh) {
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
// TODO: What's the purpose of these?
// auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
// auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -954,7 +958,8 @@ void ggml_sycl_op_mul_mat_vec_q(
const size_t q8_1_bs = QK8_1;
// the main device has a larger memory buffer to hold the results from all GPUs
// nrows_dst == nrows of the matrix that the kernel writes into
const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
// TODO: nrows_dst is unused. Please check.
// const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
for (int i = 0; i < src1_ncols; i++)
{
const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;

View file

@ -352,6 +352,7 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor*
(void)src1;
(void)dst;
(void)src1_dd;
(void) ctx;
}
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,

View file

@ -272,4 +272,5 @@ void ggml_sycl_op_rope(
(void) src1;
(void) dst;
(void) src1_dd;
(void) ctx;
}

View file

@ -53,8 +53,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
if (block_size > WARP_SIZE) {
if (warp_id == 0) {
buf[lane_id] = -INFINITY;
for (size_t i = 1; i < nreduce; i += 1)
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
buf[lane_id + i * WARP_SIZE] = -INFINITY;
}
}
item_ct1.barrier(sycl::access::fence_space::local_space);
@ -63,8 +64,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
}
item_ct1.barrier(sycl::access::fence_space::local_space);
max_val = buf[lane_id];
for (size_t i = 1; i < nreduce; i += 1)
{
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
max_val = std::max(max_val, buf[lane_id + i * WARP_SIZE]);
}
max_val = warp_reduce_max(max_val, item_ct1);
@ -89,8 +89,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
item_ct1.barrier(sycl::access::fence_space::local_space);
if (warp_id == 0) {
buf[lane_id] = 0.f;
for (size_t i = 1; i < nreduce; i += 1)
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
buf[lane_id + i * WARP_SIZE] = 0.f;
}
}
item_ct1.barrier(sycl::access::fence_space::local_space);
@ -100,8 +101,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
item_ct1.barrier(sycl::access::fence_space::local_space);
tmp = buf[lane_id];
for (size_t i = 1; i < nreduce; i += 1)
{
for (size_t i = 1; i < (size_t) nreduce; i += 1) {
tmp += buf[lane_id + i * WARP_SIZE];
}
tmp = warp_reduce_sum(tmp, item_ct1);

View file

@ -59,7 +59,8 @@ static void rwkv_wkv_f32_kernel(
float y = 0;
// Process in chunks of 4 for better vectorization
sycl::float4 k4, r4, tf4, td4, s4, kv4;
// TODO: What's the purpose of kv4?
sycl::float4 k4, r4, tf4, td4, s4;
#pragma unroll
for (int j = 0; j < head_size; j += 4) {
// Load data in vec4 chunks
@ -135,4 +136,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* s
);
});
});
// TODO: Why src0 and src1 are unused?
(void) src0;
(void) src1;
}