ggml : add ggml_flash_attn_ext API
This commit is contained in:
parent
ad19812cda
commit
a1c004ef2e
6 changed files with 456 additions and 38 deletions
9
ggml.h
9
ggml.h
|
@ -452,6 +452,7 @@ extern "C" {
|
|||
GGML_OP_LEAKY_RELU,
|
||||
|
||||
GGML_OP_FLASH_ATTN,
|
||||
GGML_OP_FLASH_ATTN_EXT,
|
||||
GGML_OP_FLASH_FF,
|
||||
GGML_OP_FLASH_ATTN_BACK,
|
||||
GGML_OP_WIN_PART,
|
||||
|
@ -1619,6 +1620,14 @@ extern "C" {
|
|||
struct ggml_tensor * v,
|
||||
bool masked);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * q,
|
||||
struct ggml_tensor * k,
|
||||
struct ggml_tensor * v,
|
||||
struct ggml_tensor * mask,
|
||||
float scale);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * q,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue