From db3ded2c8a40550022be5fa9f734f1e288ccbf52 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Mon, 13 Jan 2025 17:44:07 +0530 Subject: [PATCH] gla: Put the barrier inside the main logic loop --- ggml/src/ggml-sycl/gla.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/gla.cpp b/ggml/src/ggml-sycl/gla.cpp index 8a83da3a9..eedb47486 100644 --- a/ggml/src/ggml-sycl/gla.cpp +++ b/ggml/src/ggml-sycl/gla.cpp @@ -30,9 +30,11 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B, for (u_int i = 0; i < head_size; i++) { state[i] = s[batch_i * state_size + head_i * head_size * head_size + i * head_size + tid]; } - item.barrier(sycl::access::fence_space::local_space); //sync threads + for (u_int t = batch_i * n_seq_tokens * C + head_i * head_size + tid; t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid; t += C) { + + item.barrier(sycl::access::fence_space::local_space); //sync threads _k[tid] = k[t]; _r[tid] = r[t]; _td[tid] = td[t];