Merge branch 'master' into gg/flash-attn

This commit is contained in:
Georgi Gerganov 2024-02-19 12:58:18 +02:00
commit 31109ca00a
No known key found for this signature in database
GPG key ID: BF970631944C16B7
87 changed files with 5115 additions and 1531 deletions

27
ggml.h
View file

@ -354,6 +354,7 @@ extern "C" {
GGML_TYPE_IQ2_XXS = 16,
GGML_TYPE_IQ2_XS = 17,
GGML_TYPE_IQ3_XXS = 18,
GGML_TYPE_IQ1_S = 19,
GGML_TYPE_I8,
GGML_TYPE_I16,
GGML_TYPE_I32,
@ -391,6 +392,7 @@ extern "C" {
GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors
};
// available tensor operations:
@ -659,6 +661,16 @@ extern "C" {
void * wdata;
};
// numa strategies
enum ggml_numa_strategy {
GGML_NUMA_STRATEGY_DISABLED = 0,
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
GGML_NUMA_STRATEGY_ISOLATE = 2,
GGML_NUMA_STRATEGY_NUMACTL = 3,
GGML_NUMA_STRATEGY_MIRROR = 4,
GGML_NUMA_STRATEGY_COUNT
};
// misc
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
@ -669,7 +681,7 @@ extern "C" {
GGML_API void ggml_print_backtrace(void);
GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
GGML_API void ggml_print_object (const struct ggml_object * obj);
@ -1374,13 +1386,17 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
// fused soft_max(a*scale + mask)
// fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
// mask is optional
// pos is required when max_bias > 0.0f
// max_bias = 0.0f for no ALiBi
GGML_API struct ggml_tensor * ggml_soft_max_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * mask,
float scale);
struct ggml_tensor * pos,
float scale,
float max_bias);
GGML_API struct ggml_tensor * ggml_soft_max_back(
struct ggml_context * ctx,
@ -1482,12 +1498,13 @@ extern "C" {
// alibi position embedding
// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_alibi(
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_past,
int n_head,
float bias_max);
float bias_max),
"use ggml_soft_max_ext instead (will be removed in Mar 2024)");
// clamp
// in-place, returns view(a)