ggml : sync latest (SAM + SD operators, CUDA alibi) (#2709)

* ggml : sync latest (SAM + SD operators, CUDA alibi)

ggml-ci

* ggml : fix tabs
This commit is contained in:
Georgi Gerganov 2023-08-22 14:22:08 +03:00 committed by GitHub
parent 8e4364f2af
commit ef3f333d37
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 1090 additions and 61 deletions

115
ggml.h
View file

@ -211,6 +211,7 @@
#define GGML_MAX_OP_PARAMS 32
#define GGML_DEFAULT_N_THREADS 4
#define GGML_EXIT_SUCCESS 0
#define GGML_EXIT_ABORTED 1
@ -345,10 +346,12 @@ extern "C" {
GGML_OP_ARGMAX,
GGML_OP_REPEAT,
GGML_OP_REPEAT_BACK,
GGML_OP_CONCAT,
GGML_OP_SILU_BACK,
GGML_OP_NORM, // normalize
GGML_OP_RMS_NORM,
GGML_OP_RMS_NORM_BACK,
GGML_OP_GROUP_NORM,
GGML_OP_MUL_MAT,
GGML_OP_OUT_PROD,
@ -374,14 +377,19 @@ extern "C" {
GGML_OP_CLAMP,
GGML_OP_CONV_1D,
GGML_OP_CONV_2D,
GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
GGML_OP_UPSCALE, // nearest interpolate
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
GGML_OP_FLASH_ATTN_BACK,
GGML_OP_WIN_PART,
GGML_OP_WIN_UNPART,
GGML_OP_GET_REL_POS,
GGML_OP_ADD_REL_POS,
GGML_OP_UNARY,
@ -805,6 +813,13 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
// concat a and b on dim 2
// used in stable-diffusion
GGML_API struct ggml_tensor * ggml_concat(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
GGML_API struct ggml_tensor * ggml_abs(
struct ggml_context * ctx,
struct ggml_tensor * a);
@ -913,6 +928,19 @@ extern "C" {
struct ggml_tensor * a,
float eps);
// group normalize along ne0*ne1*n_groups
// used in stable-diffusion
// TODO: eps is hardcoded to 1e-6 for now
GGML_API struct ggml_tensor * ggml_group_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_groups);
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_groups);
// a - x
// b - dy
// TODO: update with configurable eps
@ -1213,6 +1241,15 @@ extern "C" {
float freq_base,
float freq_scale);
// xPos RoPE, in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_past,
int n_dims,
float base,
bool down);
// rotary position embedding backward, i.e compute dx from dy
// a - dy
GGML_API struct ggml_tensor * ggml_rope_back(
@ -1221,7 +1258,11 @@ extern "C" {
int n_past,
int n_dims,
int mode,
int n_ctx);
int n_ctx,
float freq_base,
float freq_scale,
float xpos_base,
bool xpos_down);
// alibi position embedding
// in-place, returns view(a)
@ -1248,6 +1289,15 @@ extern "C" {
int p0, // padding
int d0); // dilation
// conv_1d with padding = half
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s,
int d);
GGML_API struct ggml_tensor * ggml_conv_2d(
struct ggml_context * ctx,
struct ggml_tensor * a,
@ -1259,14 +1309,38 @@ extern "C" {
int d0,
int d1);
// conv_1d with padding = half
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
// kernel size is a->ne[0] x a->ne[1]
// stride is equal to kernel size
// padding is zero
// example:
// a: 16 16 3 768
// b: 1024 1024 3 1
// res: 64 64 768 1
// used in sam
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
// kernel size is a->ne[0] x a->ne[1]
// stride is 1
// padding is half
// example:
// a: 3 3 256 256
// b: 64 64 256 1
// res: 64 64 256 1
// used in sam
GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s,
int d);
int stride);
enum ggml_op_pool {
GGML_OP_POOL_MAX,
@ -1293,6 +1367,13 @@ extern "C" {
int p0,
int p1);
// nearest interpolate
// used in stable-diffusion
GGML_API struct ggml_tensor * ggml_upscale(
struct ggml_context * ctx,
struct ggml_tensor * a,
int scale_factor);
GGML_API struct ggml_tensor * ggml_flash_attn(
struct ggml_context * ctx,
struct ggml_tensor * q,
@ -1346,6 +1427,27 @@ extern "C" {
struct ggml_tensor * a,
enum ggml_unary_op op);
// used in sam
GGML_API struct ggml_tensor * ggml_get_rel_pos(
struct ggml_context * ctx,
struct ggml_tensor * a,
int qh,
int kh);
// used in sam
GGML_API struct ggml_tensor * ggml_add_rel_pos(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * pw,
struct ggml_tensor * ph);
GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * pw,
struct ggml_tensor * ph);
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@ -1500,6 +1602,7 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);