ggml : online attention (CPU)

This commit is contained in:
Georgi Gerganov 2024-01-20 12:26:49 +02:00
parent c3cdfffa88
commit a9681febd6
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
6 changed files with 231 additions and 198 deletions

5
ggml.h
View file

@ -1620,6 +1620,11 @@ extern "C" {
struct ggml_tensor * v,
bool masked);
// q: [n_embd, n_batch, n_head, 1]
// k: [n_embd, n_kv, n_head_kv, 1]
// v: [n_embd, n_kv, n_head_kv, 1] !! not transposed !!
// mask: [n_kv, n_batch, 1, 1]
// res: [n_embd, n_head, n_batch, 1] !! permuted !!
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
struct ggml_context * ctx,
struct ggml_tensor * q,