Merge branch 'master' into gg/flash-attn

This commit is contained in:
Georgi Gerganov 2024-02-12 21:16:58 +02:00
commit 6875997fd6
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
84 changed files with 7764 additions and 14303 deletions

32
ggml.h
View file

@ -506,11 +506,17 @@ extern "C" {
enum ggml_log_level {
GGML_LOG_LEVEL_ERROR = 2,
GGML_LOG_LEVEL_WARN = 3,
GGML_LOG_LEVEL_INFO = 4,
GGML_LOG_LEVEL_WARN = 3,
GGML_LOG_LEVEL_INFO = 4,
GGML_LOG_LEVEL_DEBUG = 5
};
enum ggml_tensor_flag {
GGML_TENSOR_FLAG_INPUT = 1,
GGML_TENSOR_FLAG_OUTPUT = 2,
GGML_TENSOR_FLAG_PARAM = 4,
};
// ggml object
struct ggml_object {
size_t offs;
@ -544,7 +550,7 @@ extern "C" {
// op params - allocated as int32_t for alignment
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
bool is_param;
int32_t flags;
struct ggml_tensor * grad;
struct ggml_tensor * src[GGML_MAX_SRC];
@ -568,6 +574,11 @@ extern "C" {
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
// Abort callback
// If not NULL, called before ggml computation
// If it returns true, the computation is aborted
typedef bool (*ggml_abort_callback)(void * data);
// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287
struct ggml_cplan {
@ -577,8 +588,8 @@ extern "C" {
int n_threads;
// abort ggml_graph_compute when true
bool (*abort_callback)(void * data);
void * abort_callback_data;
ggml_abort_callback abort_callback;
void * abort_callback_data;
};
enum ggml_cgraph_eval_order {
@ -2107,6 +2118,12 @@ extern "C" {
ggml_opt_callback callback,
void * callback_data);
//
// tensor flags
//
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
//
// quantization
//
@ -2293,6 +2310,7 @@ extern "C" {
GGML_API int ggml_cpu_has_ssse3 (void);
GGML_API int ggml_cpu_has_sycl (void);
GGML_API int ggml_cpu_has_vsx (void);
GGML_API int ggml_cpu_has_matmul_int8(void);
//
// Internal types and functions exposed for tests and benchmarks
@ -2306,7 +2324,8 @@ extern "C" {
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
const void * GGML_RESTRICT y, size_t by, int nrc);
typedef struct {
const char * type_name;
@ -2318,6 +2337,7 @@ extern "C" {
ggml_from_float_t from_float_reference;
ggml_vec_dot_t vec_dot;
enum ggml_type vec_dot_type;
int64_t nrows; // number of rows to process simultaneously;
} ggml_type_traits_t;
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);