metal : initial implementation
This commit is contained in:
parent
a9681febd6
commit
1173f49c3b
4 changed files with 180 additions and 33 deletions
2
ggml.c
2
ggml.c
|
@ -13419,8 +13419,8 @@ static void ggml_compute_forward_flash_attn_ext_f16(
|
|||
const int ik2 = iq2 / rk2;
|
||||
|
||||
// v indices
|
||||
const int iv2 = iq2 / rv2;
|
||||
const int iv3 = iq3 / rv3;
|
||||
const int iv2 = iq2 / rv2;
|
||||
|
||||
// online softmax / attention
|
||||
// loop over n_kv and n_head_kv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue