Merge branch 'master' into compilade/mamba2
This commit is contained in:
commit
7d6cb36895
105 changed files with 8055 additions and 5231 deletions
|
@ -66,6 +66,7 @@ extern "C" {
|
|||
// "offset" refers to the offset of the tensor data for setting/getting data
|
||||
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
GGML_API GGML_CALL void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
||||
|
||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||
|
||||
|
@ -122,7 +123,7 @@ extern "C" {
|
|||
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
||||
|
||||
GGML_API size_t ggml_backend_reg_get_count(void);
|
||||
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
||||
GGML_API size_t ggml_backend_reg_find_by_name(const char * name); // returns index of backend with name, or SIZE_MAX if not found
|
||||
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
|
||||
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
||||
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
||||
|
|
|
@ -25,9 +25,6 @@
|
|||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// max memory buffers that can be mapped to the device
|
||||
#define GGML_METAL_MAX_BUFFERS 64
|
||||
|
||||
struct ggml_tensor;
|
||||
struct ggml_cgraph;
|
||||
|
||||
|
@ -48,8 +45,6 @@ GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
|||
|
||||
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
||||
|
||||
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
||||
|
||||
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
||||
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||
|
|
|
@ -229,14 +229,16 @@
|
|||
#define GGML_MAX_PARAMS 2048
|
||||
#define GGML_MAX_CONTEXTS 64
|
||||
#define GGML_MAX_SRC 10
|
||||
#ifndef GGML_MAX_NAME
|
||||
#define GGML_MAX_NAME 64
|
||||
#define GGML_MAX_N_THREADS 512
|
||||
|
||||
#endif
|
||||
#define GGML_MAX_OP_PARAMS 64
|
||||
|
||||
#ifndef GGML_MAX_NAME
|
||||
# define GGML_MAX_NAME 64
|
||||
#endif
|
||||
|
||||
#define GGML_DEFAULT_N_THREADS 4
|
||||
#define GGML_DEFAULT_GRAPH_SIZE 2048
|
||||
|
||||
#if UINTPTR_MAX == 0xFFFFFFFF
|
||||
#define GGML_MEM_ALIGN 4
|
||||
#else
|
||||
|
@ -259,21 +261,21 @@
|
|||
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
||||
# define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
||||
#elif defined(__GNUC__)
|
||||
#define GGML_UNREACHABLE() __builtin_unreachable()
|
||||
# define GGML_UNREACHABLE() __builtin_unreachable()
|
||||
#elif defined(_MSC_VER)
|
||||
#define GGML_UNREACHABLE() __assume(0)
|
||||
# define GGML_UNREACHABLE() __assume(0)
|
||||
#else
|
||||
#define GGML_UNREACHABLE() ((void) 0)
|
||||
# define GGML_UNREACHABLE() ((void) 0)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define GGML_NORETURN [[noreturn]]
|
||||
# define GGML_NORETURN [[noreturn]]
|
||||
#elif defined(_MSC_VER)
|
||||
#define GGML_NORETURN __declspec(noreturn)
|
||||
# define GGML_NORETURN __declspec(noreturn)
|
||||
#else
|
||||
#define GGML_NORETURN _Noreturn
|
||||
# define GGML_NORETURN _Noreturn
|
||||
#endif
|
||||
|
||||
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
|
||||
|
@ -534,6 +536,7 @@ extern "C" {
|
|||
|
||||
GGML_OP_CROSS_ENTROPY_LOSS,
|
||||
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
||||
GGML_OP_OPT_STEP_ADAMW,
|
||||
|
||||
GGML_OP_COUNT,
|
||||
};
|
||||
|
@ -569,12 +572,15 @@ extern "C" {
|
|||
GGML_LOG_LEVEL_WARN = 2,
|
||||
GGML_LOG_LEVEL_ERROR = 3,
|
||||
GGML_LOG_LEVEL_DEBUG = 4,
|
||||
GGML_LOG_LEVEL_CONT = 5, // continue previous log
|
||||
};
|
||||
|
||||
// this tensor...
|
||||
enum ggml_tensor_flag {
|
||||
GGML_TENSOR_FLAG_INPUT = 1,
|
||||
GGML_TENSOR_FLAG_OUTPUT = 2,
|
||||
GGML_TENSOR_FLAG_PARAM = 4,
|
||||
GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
|
||||
GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
|
||||
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
|
||||
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
||||
};
|
||||
|
||||
// n-dimensional tensor
|
||||
|
@ -1404,14 +1410,14 @@ extern "C" {
|
|||
// supports 3D: a->ne[2] == b->ne[1]
|
||||
GGML_API struct ggml_tensor * ggml_get_rows(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
struct ggml_tensor * a, // data
|
||||
struct ggml_tensor * b); // row indices
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_get_rows_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c);
|
||||
struct ggml_tensor * a, // gradients of ggml_get_rows result
|
||||
struct ggml_tensor * b, // row indices
|
||||
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_diag(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -1562,9 +1568,9 @@ extern "C" {
|
|||
// a - dy
|
||||
GGML_API struct ggml_tensor * ggml_rope_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
struct ggml_tensor * a, // gradients of ggml_rope result
|
||||
struct ggml_tensor * b, // positions
|
||||
struct ggml_tensor * c, // freq factors
|
||||
int n_dims,
|
||||
int mode,
|
||||
int n_ctx_orig,
|
||||
|
@ -1978,6 +1984,9 @@ extern "C" {
|
|||
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
||||
|
||||
#define GGML_N_TASKS_MAX (-1)
|
||||
// n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -2029,33 +2038,55 @@ extern "C" {
|
|||
// loss function
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a, // logits
|
||||
struct ggml_tensor * b); // labels
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c);
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a, // logits
|
||||
struct ggml_tensor * b, // labels
|
||||
struct ggml_tensor * c); // gradients of cross_entropy_loss result
|
||||
|
||||
// AdamW optimizer step
|
||||
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
|
||||
// PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
|
||||
GGML_API struct ggml_tensor * ggml_opt_step_adamw(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * grad,
|
||||
float alpha,
|
||||
float beta1,
|
||||
float beta2,
|
||||
float eps,
|
||||
float wd); // weight decay
|
||||
|
||||
//
|
||||
// automatic differentiation
|
||||
//
|
||||
|
||||
GGML_API void ggml_set_param(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
|
||||
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate);
|
||||
|
||||
GGML_API void ggml_build_opt_adamw(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_cgraph * gf,
|
||||
struct ggml_cgraph * gb,
|
||||
float alpha,
|
||||
float beta1,
|
||||
float beta2,
|
||||
float eps,
|
||||
float wd); // weight decay
|
||||
|
||||
// graph allocation in a context
|
||||
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
||||
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
||||
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
||||
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
||||
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
||||
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
||||
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
||||
|
||||
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
|
||||
|
@ -2481,6 +2512,9 @@ extern "C" {
|
|||
GGML_API int ggml_cpu_has_cann (void);
|
||||
GGML_API int ggml_cpu_has_llamafile (void);
|
||||
|
||||
// get the sve vector length in bytes
|
||||
GGML_API int ggml_cpu_get_sve_cnt(void);
|
||||
|
||||
//
|
||||
// Internal types and functions exposed for tests and benchmarks
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue