ggml : hide ggml_object, ggml_cgraph, ggml_hash_set
ggml-ci
This commit is contained in:
parent
00ba2ff781
commit
c8a3f291fe
13 changed files with 131 additions and 92 deletions
|
@ -358,6 +358,7 @@ extern "C" {
|
|||
|
||||
struct ggml_object;
|
||||
struct ggml_context;
|
||||
struct ggml_cgraph;
|
||||
|
||||
// NOTE: always add types at the end of the enum to keep backward compatibility
|
||||
enum ggml_type {
|
||||
|
@ -575,23 +576,9 @@ extern "C" {
|
|||
GGML_TENSOR_FLAG_PARAM = 4,
|
||||
};
|
||||
|
||||
// ggml object
|
||||
struct ggml_object {
|
||||
size_t offs;
|
||||
size_t size;
|
||||
|
||||
struct ggml_object * next;
|
||||
|
||||
enum ggml_object_type type;
|
||||
|
||||
char padding[4];
|
||||
};
|
||||
|
||||
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
||||
|
||||
// n-dimensional tensor
|
||||
struct ggml_tensor {
|
||||
enum ggml_type type;
|
||||
enum ggml_type type;
|
||||
|
||||
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
||||
|
||||
|
@ -655,7 +642,7 @@ extern "C" {
|
|||
|
||||
struct ggml_threadpool; // forward declaration, see ggml.c
|
||||
|
||||
typedef struct ggml_threadpool * ggml_threadpool_t;
|
||||
typedef struct ggml_threadpool * ggml_threadpool_t;
|
||||
|
||||
// the compute plan that needs to be prepared for ggml_graph_compute()
|
||||
// since https://github.com/ggerganov/ggml/issues/287
|
||||
|
@ -671,35 +658,6 @@ extern "C" {
|
|||
void * abort_callback_data;
|
||||
};
|
||||
|
||||
enum ggml_cgraph_eval_order {
|
||||
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
||||
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
||||
GGML_CGRAPH_EVAL_ORDER_COUNT
|
||||
};
|
||||
|
||||
typedef uint32_t ggml_bitset_t;
|
||||
|
||||
struct ggml_hash_set {
|
||||
size_t size;
|
||||
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
|
||||
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
|
||||
};
|
||||
|
||||
// computation graph
|
||||
struct ggml_cgraph {
|
||||
int size;
|
||||
int n_nodes;
|
||||
int n_leafs;
|
||||
|
||||
struct ggml_tensor ** nodes;
|
||||
struct ggml_tensor ** grads;
|
||||
struct ggml_tensor ** leafs;
|
||||
|
||||
struct ggml_hash_set visited_hash_set;
|
||||
|
||||
enum ggml_cgraph_eval_order order;
|
||||
};
|
||||
|
||||
// scratch buffer
|
||||
struct ggml_scratch {
|
||||
size_t offs;
|
||||
|
@ -2017,8 +1975,6 @@ extern "C" {
|
|||
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
||||
|
||||
#define GGML_N_TASKS_MAX -1
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
|
@ -2088,30 +2044,35 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * tensor);
|
||||
|
||||
|
||||
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
|
||||
|
||||
// graph allocation in a context
|
||||
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
||||
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
|
||||
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
||||
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
|
||||
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
||||
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
||||
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
||||
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
||||
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
||||
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
||||
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
||||
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
||||
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
||||
|
||||
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
|
||||
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
|
||||
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
|
||||
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
|
||||
|
||||
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API size_t ggml_graph_overhead(void);
|
||||
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
||||
|
||||
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
||||
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params *p, int n_threads);
|
||||
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
|
||||
GGML_API struct ggml_threadpool* ggml_threadpool_new (struct ggml_threadpool_params * params);
|
||||
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
||||
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
|
||||
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
||||
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
||||
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
||||
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
|
||||
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
|
||||
GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
||||
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
||||
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
|
||||
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
||||
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
||||
|
||||
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
||||
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "ggml-impl.h"
|
||||
#include "ggml-blas.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include "ggml-cuda.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#include "ggml-cuda/common.cuh"
|
||||
|
|
|
@ -629,8 +629,16 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
|||
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||
#endif
|
||||
|
||||
enum ggml_cgraph_eval_order {
|
||||
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
||||
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
||||
GGML_CGRAPH_EVAL_ORDER_COUNT
|
||||
};
|
||||
|
||||
// bitset
|
||||
|
||||
typedef uint32_t ggml_bitset_t;
|
||||
|
||||
static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
|
||||
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
|
||||
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
|
||||
|
@ -656,6 +664,12 @@ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
|
|||
#define GGML_HASHSET_FULL ((size_t)-1)
|
||||
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
|
||||
|
||||
struct ggml_hash_set {
|
||||
size_t size;
|
||||
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
|
||||
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
|
||||
};
|
||||
|
||||
struct ggml_hash_set ggml_hash_set_new(size_t size);
|
||||
void ggml_hash_set_free(struct ggml_hash_set * hash_set);
|
||||
|
||||
|
@ -745,6 +759,24 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
|
|||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
// computation graph
|
||||
|
||||
struct ggml_cgraph {
|
||||
int size;
|
||||
int n_nodes;
|
||||
int n_leafs;
|
||||
|
||||
struct ggml_tensor ** nodes;
|
||||
struct ggml_tensor ** grads;
|
||||
struct ggml_tensor ** leafs;
|
||||
|
||||
struct ggml_hash_set visited_hash_set;
|
||||
|
||||
enum ggml_cgraph_eval_order order;
|
||||
};
|
||||
|
||||
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#import "ggml-metal.h"
|
||||
|
||||
#import "ggml-impl.h"
|
||||
#import "ggml-backend-impl.h"
|
||||
#import "ggml.h"
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
|
@ -882,7 +882,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||
// create multiple command buffers and enqueue them
|
||||
// then, we encode the graph into the command buffers in parallel
|
||||
|
||||
const int n_nodes = gf->n_nodes;
|
||||
const int n_nodes = gf->n_nodes;
|
||||
const int n_cb = ctx->n_cb;
|
||||
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
||||
|
||||
|
|
|
@ -287,6 +287,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
|
|||
#define GGML_DEBUG 0
|
||||
#define GGML_GELU_FP16
|
||||
#define GGML_GELU_QUICK_FP16
|
||||
#define GGML_N_TASKS_MAX (-1)
|
||||
|
||||
#define GGML_SOFT_MAX_UNROLL 4
|
||||
#define GGML_VEC_DOT_UNROLL 2
|
||||
|
@ -1893,6 +1894,23 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
|
|||
#define GGML_F16_ARR (GGML_F16_STEP/GGML_F16_EPR)
|
||||
#endif
|
||||
|
||||
//
|
||||
// ggml object
|
||||
//
|
||||
|
||||
struct ggml_object {
|
||||
size_t offs;
|
||||
size_t size;
|
||||
|
||||
struct ggml_object * next;
|
||||
|
||||
enum ggml_object_type type;
|
||||
|
||||
char padding[4];
|
||||
};
|
||||
|
||||
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
||||
|
||||
//
|
||||
// ggml context
|
||||
//
|
||||
|
@ -19161,6 +19179,33 @@ void ggml_graph_clear(struct ggml_cgraph * cgraph) {
|
|||
ggml_hash_set_reset(&cgraph->visited_hash_set);
|
||||
}
|
||||
|
||||
int ggml_graph_size(struct ggml_cgraph * cgraph) {
|
||||
return cgraph->size;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
|
||||
if (i < 0) {
|
||||
GGML_ASSERT(cgraph->n_nodes + i >= 0);
|
||||
return cgraph->nodes[cgraph->n_nodes + i];
|
||||
}
|
||||
|
||||
GGML_ASSERT(i < cgraph->n_nodes);
|
||||
return cgraph->nodes[i];
|
||||
}
|
||||
|
||||
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
|
||||
return cgraph->nodes;
|
||||
}
|
||||
|
||||
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
|
||||
return cgraph->n_nodes;
|
||||
}
|
||||
|
||||
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
|
||||
cgraph->nodes[cgraph->n_nodes] = tensor;
|
||||
cgraph->n_nodes++;
|
||||
}
|
||||
|
||||
// Android's libc implementation "bionic" does not support setting affinity
|
||||
#if defined(__gnu_linux__)
|
||||
static void set_numa_thread_affinity(int thread_n) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue