ggml : full ALiBi support (#7192)
* ggml : full ALiBi support * ggml : update ggml_soft_max_ext() CUDA, SYCL * ggml : ggml_flash_attn_ext() support ALiBi (CPU) * ggml : ggml_flash_attn_ext() support ALiBi (Metal) * ggml : fix warning * ggml : ggml_flash_attn_ext() support ALiBi (CUDA) ggml-ci * ggml : fix assert message * vulkan : add dev notes * ggml : require mask when using ALiBi ggml-ci * convert : fix convert for refact models
This commit is contained in:
parent
e849648888
commit
9cb317f77e
16 changed files with 350 additions and 825 deletions
18
ggml.h
18
ggml.h
|
@ -468,7 +468,6 @@ extern "C" {
|
|||
GGML_OP_SOFT_MAX_BACK,
|
||||
GGML_OP_ROPE,
|
||||
GGML_OP_ROPE_BACK,
|
||||
GGML_OP_ALIBI,
|
||||
GGML_OP_CLAMP,
|
||||
GGML_OP_CONV_TRANSPOSE_1D,
|
||||
GGML_OP_IM2COL,
|
||||
|
@ -1428,15 +1427,13 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
|
||||
// fused soft_max(a*scale + mask*(ALiBi slope))
|
||||
// mask is optional
|
||||
// pos is required when max_bias > 0.0f
|
||||
// max_bias = 0.0f for no ALiBi
|
||||
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * mask,
|
||||
struct ggml_tensor * pos,
|
||||
float scale,
|
||||
float max_bias);
|
||||
|
||||
|
@ -1538,16 +1535,6 @@ extern "C" {
|
|||
float xpos_base,
|
||||
bool xpos_down);
|
||||
|
||||
// alibi position embedding
|
||||
// in-place, returns view(a)
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_head,
|
||||
float bias_max),
|
||||
"use ggml_soft_max_ext instead (will be removed in Mar 2024)");
|
||||
|
||||
// clamp
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_clamp(
|
||||
|
@ -1744,7 +1731,8 @@ extern "C" {
|
|||
struct ggml_tensor * k,
|
||||
struct ggml_tensor * v,
|
||||
struct ggml_tensor * mask,
|
||||
float scale);
|
||||
float scale,
|
||||
float max_bias);
|
||||
|
||||
GGML_API void ggml_flash_attn_ext_set_prec(
|
||||
struct ggml_tensor * a,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue