diff --git a/ggml-cuda/mmq.cuh b/ggml-cuda/mmq.cuh index 41fe2d814..62111f376 100644 --- a/ggml-cuda/mmq.cuh +++ b/ggml-cuda/mmq.cuh @@ -1344,7 +1344,7 @@ static __device__ __forceinline__ void mmq_write_back_mma(const float * __restri const int j = blockIdx.y*mmq_x + j0 + mma_C::get_j(l); if (j >= ne1) { - return; + continue; } const int i = blockIdx.x*mmq_y + i0 + mma_C::get_i(l);