ggml : add ggml_flash_attn_ext API

This commit is contained in:
Georgi Gerganov 2024-01-18 17:42:55 +02:00
parent ad19812cda
commit a1c004ef2e
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
6 changed files with 456 additions and 38 deletions

9
ggml.h
View file

@ -452,6 +452,7 @@ extern "C" {
GGML_OP_LEAKY_RELU,
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_ATTN_EXT,
GGML_OP_FLASH_FF,
GGML_OP_FLASH_ATTN_BACK,
GGML_OP_WIN_PART,
@ -1619,6 +1620,14 @@ extern "C" {
struct ggml_tensor * v,
bool masked);
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
struct ggml_context * ctx,
struct ggml_tensor * q,
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * mask,
float scale);
GGML_API struct ggml_tensor * ggml_flash_attn_back(
struct ggml_context * ctx,
struct ggml_tensor * q,