Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Georgi Gerganov
a95225cdfd
metal : another fix for the fa kernel 2024-08-26 15:08:38 +03:00

View file

@ -2144,6 +2144,7 @@ kernel void kernel_flash_attn_ext_f16(
const short tx = tiisg%4;
const short ty = tiisg/4;
if (iq1 + ty < ne01) {
// mqk = mqk*scale
ss[8*cc + ty*TF + 2*tx + 0] *= scale;
ss[8*cc + ty*TF + 2*tx + 1] *= scale;
@ -2160,6 +2161,7 @@ kernel void kernel_flash_attn_ext_f16(
}
}
}
}
// used to detect blocks full of -INF
float smax = -INFINITY;