diff --git a/ggml/src/ggml-sycl/gla.cpp b/ggml/src/ggml-sycl/gla.cpp index 8a83da3a9..eedb47486 100644 --- a/ggml/src/ggml-sycl/gla.cpp +++ b/ggml/src/ggml-sycl/gla.cpp @@ -30,9 +30,11 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B, for (u_int i = 0; i < head_size; i++) { state[i] = s[batch_i * state_size + head_i * head_size * head_size + i * head_size + tid]; } - item.barrier(sycl::access::fence_space::local_space); //sync threads + for (u_int t = batch_i * n_seq_tokens * C + head_i * head_size + tid; t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid; t += C) { + + item.barrier(sycl::access::fence_space::local_space); //sync threads _k[tid] = k[t]; _r[tid] = r[t]; _td[tid] = td[t];