diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 0674bd3c9..4c37f4278 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -229,6 +229,7 @@ template static } // sum up partial sums and write back result + __syncthreads(); for (int s=block_size/2; s>0; s>>=1) { if (tid < s) { tmp[tid] += tmp[tid + s];