From 7dda9aad239c2313586f34e7eeec20fef33d2594 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 11 Dec 2024 08:54:19 +0530 Subject: [PATCH] SYCL: remove the unused variables instead of commenting it out --- ggml/src/ggml-sycl/common.cpp | 10 ---------- ggml/src/ggml-sycl/ggml-sycl.cpp | 9 +++------ ggml/src/ggml-sycl/mmvq.cpp | 22 ++++++---------------- 3 files changed, 9 insertions(+), 32 deletions(-) diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 0738486bb..a9ee40491 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -64,21 +64,11 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr const ggml_tensor *src1, ggml_tensor *dst, const ggml_sycl_op_flatten_t op) try { - // TODO: What's the use of these? - // const int64_t nrows0 = ggml_nrows(src0); - // const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1; - const bool use_src1 = src1 != nullptr; GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT); GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT); - // TODO: What are these uses of these? - - // ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; - // ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr; - // ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; - // dd = data device float * src0_ddf = (float *) src0->data; float * src1_ddf = use_src1 ? (float *) src1->data : nullptr; diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index a76b90dcd..0cea15ca4 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -2873,8 +2873,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra; - // TODO: What's the use of this? - // ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; + const bool src0_is_contiguous = ggml_is_contiguous(src0); const bool src1_is_contiguous = ggml_is_contiguous(src1); @@ -3300,8 +3299,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_TENSOR_BINARY_OP_LOCALS - // TODO: What's the use of this? - //const int64_t ne_dst = ggml_nelements(dst); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); queue_ptr main_stream = ctx.stream();; @@ -4234,8 +4232,7 @@ catch (sycl::exception const &exc) } static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try { - // TODO: sycl_ctx is unused here - // ggml_backend_sycl_context* sycl_ctx = static_cast(backend->context); + sycl::event* sycl_event = static_cast(event->context); if (ggml_backend_is_sycl(backend)) { diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp index dfc821ab9..2da0c2507 100644 --- a/ggml/src/ggml-sycl/mmvq.cpp +++ b/ggml/src/ggml-sycl/mmvq.cpp @@ -754,9 +754,7 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, QK_WARP_SIZE); { stream->submit([&](sycl::handler & cgh) { - //TODO: What's the purpose of these? - //auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0]; - //auto ksigns64_ptr_ct1 = &ksigns64[0]; + cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -780,9 +778,7 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - // TODO: What's the purpose of these? - // auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0]; - // auto ksigns64_ptr_ct1 = &ksigns64[0]; + cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -806,9 +802,7 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - // TODO: What's the purpose of these? - // auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0]; - // auto ksigns64_ptr_ct1 = &ksigns64[0]; + cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -832,8 +826,7 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - // TODO: What's the purpose of this? - // auto iq3s_grid_ptr_ct1 = &iq3s_grid[0]; + cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -857,9 +850,7 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy, { stream->submit([&](sycl::handler &cgh) { - // TODO: What's the purpose of these? - // auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0]; - // auto ksigns64_ptr_ct1 = &ksigns64[0]; + cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -958,8 +949,7 @@ void ggml_sycl_op_mul_mat_vec_q( const size_t q8_1_bs = QK8_1; // the main device has a larger memory buffer to hold the results from all GPUs // nrows_dst == nrows of the matrix that the kernel writes into - // TODO: nrows_dst is unused. Please check. - // const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff; + for (int i = 0; i < src1_ncols; i++) { const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;