From dd3f4085c699317d9461e707ed7135757d3c4b8b Mon Sep 17 00:00:00 2001 From: zhentaoyu Date: Tue, 30 Jul 2024 05:47:42 +0000 Subject: [PATCH] sycl: fix half_ceil in tsembd Signed-off-by: zhentaoyu --- ggml/src/ggml-sycl/tsembd.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp index 571c50347..d5c227cd1 100644 --- a/ggml/src/ggml-sycl/tsembd.cpp +++ b/ggml/src/ggml-sycl/tsembd.cpp @@ -40,7 +40,8 @@ static void timestep_embedding_f32( static void timestep_embedding_f32_sycl( const float * x, float * dst, const int ne00, const int nb1, const int dim, const int max_period, const queue_ptr& stream) { - int half_ceil = (dim + 1) / 2; + // As the kernel returns when thread.idx is larger than dim/2, the half_ceil does not need to pad + int half_ceil = dim / 2; int num_blocks = (half_ceil + SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE - 1) / SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE; sycl::range<3> block_dims(1, 1, SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE); sycl::range<3> gridDim(1, ne00, num_blocks);