From 28eaafc16617079ad439fdd4d5b020797976f028 Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Sat, 15 Jun 2024 02:12:42 +0000 Subject: [PATCH] use macro for group_size and remove cuda-related --- ggml-sycl.cpp | 14 ++++++-------- ggml-sycl/presets.hpp | 2 ++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 4ff6751cd..6bd42b960 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -6220,7 +6220,7 @@ static void norm_f32_sycl(const float *x, float *dst, const int ncols, }); } else { // FIXME: 1024 from cuda - const int work_group_size = 1024; + const int work_group_size = GROUP_SIZE; const sycl::range<3> block_dims(1, 1, work_group_size); /* DPCT1049:17: The work-group size passed to the SYCL kernel may exceed @@ -6266,7 +6266,7 @@ static void group_norm_f32_sycl(const float *x, float *dst, }); }); } else { - const int work_group_size = 1024; + const int work_group_size = GROUP_SIZE; const sycl::range<3> block_dims(1, 1, work_group_size); /* DPCT1049:18: The work-group size passed to the SYCL kernel may exceed @@ -6355,7 +6355,7 @@ static void rms_norm_f32_sycl(const float *x, float *dst, const int ncols, }); }); } else { - const int work_group_size = 1024; + const int work_group_size = GROUP_SIZE; const sycl::range<3> block_dims(1, 1, work_group_size); /* DPCT1049:19: The work-group size passed to the SYCL kernel may exceed @@ -9115,8 +9115,6 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols, const sycl::range<3> block_nums(1, nrows, 1); const size_t shared_mem = ncols_pad * sizeof(int); - // GGML_ASSERT(shared_mem <= ggml_cuda_info().devices[ggml_cuda_get_device()].smpb); - if (order == GGML_SORT_ORDER_ASC) { stream->submit([&](sycl::handler &cgh) { sycl::local_accessor dpct_local_acc_ct1( @@ -9189,7 +9187,7 @@ static void soft_max_f32_sycl(const float * x, const float * mask, const int nrows_y, const float scale, const float max_bias, queue_ptr stream) { int nth = WARP_SIZE; - int max_block_size = 1024; + int max_block_size = GROUP_SIZE; while (nth < ncols_x && nth < max_block_size) nth *= 2; if (nth>max_block_size) nth = max_block_size; @@ -9572,8 +9570,8 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool { std::unique_ptr ggml_backend_sycl_context::new_pool_for_device(queue_ptr qptr, int device) { // TBD: NO VMM support - // if (ggml_cuda_info().devices[device].vmm) { - // return std::unique_ptr(new ggml_cuda_pool_vmm(device)); + // if (ggml_sycl_info().devices[device].vmm) { + // return std::unique_ptr(new ggml_sycl_pool_vmm(device)); // } return std::unique_ptr(new ggml_sycl_pool_leg(qptr, device)); } diff --git a/ggml-sycl/presets.hpp b/ggml-sycl/presets.hpp index 5e6b61813..dcf026110 100644 --- a/ggml-sycl/presets.hpp +++ b/ggml-sycl/presets.hpp @@ -18,6 +18,8 @@ #define GGML_SYCL_MAX_DEVICES 48 #define GGML_SYCL_NAME "SYCL" +// FIXME: 1024 from cuda +#define GROUP_SIZE 1024 #define WARP_SIZE 32 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses