Merge branch 'master' into compilade/batch-splits

This commit is contained in:
Francis Couture-Harpin 2024-07-28 01:20:13 -04:00
commit 0dea4263aa
85 changed files with 2886 additions and 6522 deletions

View file

@ -254,18 +254,8 @@
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
#define GGML_ASSERT(x) \
do { \
if (!(x)) { \
fflush(stdout); \
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
ggml_print_backtrace(); \
abort(); \
} \
} while (0)
#ifndef NDEBUG
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#elif defined(__GNUC__)
#define GGML_UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER)
@ -274,6 +264,17 @@
#define GGML_UNREACHABLE() ((void) 0)
#endif
#ifdef __cplusplus
#define GGML_NORETURN [[noreturn]]
#elif defined(_MSC_VER)
#define GGML_NORETURN __declspec(noreturn)
#else
#define GGML_NORETURN _Noreturn
#endif
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
// used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability.
//
@ -322,6 +323,9 @@
extern "C" {
#endif
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
enum ggml_status {
GGML_STATUS_ALLOC_FAILED = -2,
GGML_STATUS_FAILED = -1,
@ -636,8 +640,11 @@ extern "C" {
GGML_CGRAPH_EVAL_ORDER_COUNT
};
typedef uint32_t ggml_bitset_t;
struct ggml_hash_set {
size_t size;
ggml_bitset_t * used;
struct ggml_tensor ** keys;
};
@ -651,7 +658,7 @@ extern "C" {
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;
struct ggml_hash_set visited_hash_table;
struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order;
};
@ -698,8 +705,6 @@ extern "C" {
GGML_API int64_t ggml_cycles(void);
GGML_API int64_t ggml_cycles_per_ms(void);
GGML_API void ggml_print_backtrace(void);
// accepts a UTF-8 path, even on Windows
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
@ -2002,8 +2007,8 @@ extern "C" {
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
@ -2397,6 +2402,7 @@ extern "C" {
GGML_API int ggml_cpu_has_vsx (void);
GGML_API int ggml_cpu_has_matmul_int8(void);
GGML_API int ggml_cpu_has_cann (void);
GGML_API int ggml_cpu_has_llamafile (void);
//
// Internal types and functions exposed for tests and benchmarks