metal : use F32 attention accumulators

This commit is contained in:
Georgi Gerganov 2024-04-18 20:08:52 +03:00
parent fa9e8c6689
commit c16a7c2688
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 81 additions and 93 deletions

3
ggml.c
View file

@ -14882,12 +14882,13 @@ static void ggml_compute_forward_flash_attn_ext(
struct ggml_tensor * dst) {
switch (dst->op_params[1]) {
case GGML_PREC_DEFAULT:
case GGML_PREC_F32:
{
// uses F32 accumulators
ggml_compute_forward_flash_attn_ext_f16(params, q, k, v, mask, dst);
} break;
default:
{
// TODO: implement F32 precision
GGML_ASSERT(false);
} break;
}