ggml : online attention (CPU)

This commit is contained in:
Georgi Gerganov 2024-01-20 12:26:49 +02:00
parent c3cdfffa88
commit a9681febd6
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
6 changed files with 231 additions and 198 deletions

View file

@ -1981,7 +1981,8 @@ kernel void kernel_flash_attn_ext_f16(
constant uint64_t & nb1,
constant uint64_t & nb2,
constant uint64_t & nb3,
constant float & scale,
constant float & scale,
threadgroup float * shared [[threadgroup(0)]],
uint3 tgpig[[threadgroup_position_in_grid]],
uint3 tpitg[[thread_position_in_threadgroup]],
uint3 ntg[[threads_per_threadgroup]]) {