diff --git a/ggml/src/ggml-metal.metal b/ggml/src/ggml-metal.metal index 1b32e4384..84e92e4fc 100644 --- a/ggml/src/ggml-metal.metal +++ b/ggml/src/ggml-metal.metal @@ -2144,7 +2144,7 @@ kernel void kernel_flash_attn_ext_f16( } // scale and apply the mask (assume C = 32) - for (short j = 0; j < Q && iq1 + j < ne01; ++j) { + for (short j = 0; j < Q; ++j) { // mqk = mqk*scale ss[j*TF + tiisg] *= scale;