ggml : graph allocation in contexts
This commit is contained in:
parent
eb542d3932
commit
59e808b49b
2 changed files with 118 additions and 70 deletions
128
ggml.c
128
ggml.c
|
@ -4071,8 +4071,8 @@ bool ggml_is_numa(void) {
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void ggml_print_object(const struct ggml_object * obj) {
|
void ggml_print_object(const struct ggml_object * obj) {
|
||||||
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
||||||
obj->offs, obj->size, (const void *) obj->next);
|
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_print_objects(const struct ggml_context * ctx) {
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
||||||
|
@ -4212,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_tensor_overhead(void) {
|
size_t ggml_tensor_overhead(void) {
|
||||||
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16;
|
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE; // REVIEW: i don't think we need to 16 here because GGML_OBJECT_SIZE and GGML_TENSOR_SIZE are already aligned
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
||||||
|
@ -4472,6 +4472,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
||||||
struct ggml_object * obj = ctx->objects_begin;
|
struct ggml_object * obj = ctx->objects_begin;
|
||||||
|
|
||||||
while (obj != NULL) {
|
while (obj != NULL) {
|
||||||
|
if (obj->type == GGML_OBJECT_TENSOR) {
|
||||||
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
||||||
|
|
||||||
const size_t size = ggml_nbytes(tensor);
|
const size_t size = ggml_nbytes(tensor);
|
||||||
|
@ -4479,6 +4480,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
||||||
if (max_size < size) {
|
if (max_size < size) {
|
||||||
max_size = size;
|
max_size = size;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
obj = obj->next;
|
obj = obj->next;
|
||||||
}
|
}
|
||||||
|
@ -4509,12 +4511,7 @@ static void ggml_scratch_load(struct ggml_context * ctx) {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static struct ggml_tensor * ggml_new_tensor_impl(
|
static struct ggml_object * ggml_new_object( struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
||||||
struct ggml_context * ctx,
|
|
||||||
enum ggml_type type,
|
|
||||||
int n_dims,
|
|
||||||
const int64_t* ne,
|
|
||||||
void* data) {
|
|
||||||
// always insert objects at the end of the context's memory pool
|
// always insert objects at the end of the context's memory pool
|
||||||
struct ggml_object * obj_cur = ctx->objects_end;
|
struct ggml_object * obj_cur = ctx->objects_end;
|
||||||
|
|
||||||
|
@ -4522,62 +4519,29 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
||||||
const size_t cur_end = cur_offs + cur_size;
|
const size_t cur_end = cur_offs + cur_size;
|
||||||
|
|
||||||
size_t size_needed = 0;
|
size_t size_needed = size;
|
||||||
|
|
||||||
if (data == NULL && !ctx->no_alloc) {
|
|
||||||
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
|
||||||
for (int i = 1; i < n_dims; i++) {
|
|
||||||
size_needed *= ne[i];
|
|
||||||
}
|
|
||||||
// align to GGML_MEM_ALIGN
|
// align to GGML_MEM_ALIGN
|
||||||
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
||||||
}
|
|
||||||
|
|
||||||
char * const mem_buffer = ctx->mem_buffer;
|
char * const mem_buffer = ctx->mem_buffer;
|
||||||
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
||||||
|
|
||||||
if (ctx->scratch.data == NULL || data != NULL) {
|
|
||||||
size_needed += GGML_TENSOR_SIZE;
|
|
||||||
|
|
||||||
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
__func__, cur_end + size_needed, ctx->mem_size);
|
||||||
assert(false);
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
*obj_new = (struct ggml_object) {
|
*obj_new = (struct ggml_object) {
|
||||||
|
.type = type,
|
||||||
.offs = cur_end + GGML_OBJECT_SIZE,
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
||||||
.size = size_needed,
|
.size = size_needed,
|
||||||
.next = NULL,
|
.next = NULL,
|
||||||
};
|
};
|
||||||
} else {
|
|
||||||
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
|
||||||
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
|
||||||
__func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
|
|
||||||
assert(false);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
|
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
|
||||||
__func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
|
|
||||||
assert(false);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
|
||||||
|
|
||||||
*obj_new = (struct ggml_object) {
|
|
||||||
.offs = cur_end + GGML_OBJECT_SIZE,
|
|
||||||
.size = GGML_TENSOR_SIZE,
|
|
||||||
.next = NULL,
|
|
||||||
};
|
|
||||||
|
|
||||||
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
|
||||||
|
|
||||||
ctx->scratch.offs += size_needed;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (obj_cur != NULL) {
|
if (obj_cur != NULL) {
|
||||||
obj_cur->next = obj_new;
|
obj_cur->next = obj_new;
|
||||||
|
@ -4590,9 +4554,46 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
|
|
||||||
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
||||||
|
|
||||||
struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
|
return obj_new;
|
||||||
|
}
|
||||||
|
|
||||||
ggml_assert_aligned(result);
|
static struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
enum ggml_type type,
|
||||||
|
int n_dims,
|
||||||
|
const int64_t* ne,
|
||||||
|
void* data) {
|
||||||
|
|
||||||
|
size_t data_size = 0;
|
||||||
|
|
||||||
|
if (data == NULL && !ctx->no_alloc) {
|
||||||
|
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
||||||
|
for (int i = 1; i < n_dims; i++) {
|
||||||
|
data_size *= ne[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx->scratch.data != NULL && data == NULL) {
|
||||||
|
// allocate tensor data on scratch buffer
|
||||||
|
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
||||||
|
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
||||||
|
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
||||||
|
assert(false);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
||||||
|
|
||||||
|
ctx->scratch.offs += data_size;
|
||||||
|
|
||||||
|
data_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
||||||
|
|
||||||
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
||||||
|
|
||||||
|
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
||||||
|
|
||||||
*result = (struct ggml_tensor) {
|
*result = (struct ggml_tensor) {
|
||||||
/*.type =*/ type,
|
/*.type =*/ type,
|
||||||
|
@ -5026,10 +5027,12 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
||||||
char * const mem_buffer = ctx->mem_buffer;
|
char * const mem_buffer = ctx->mem_buffer;
|
||||||
|
|
||||||
while (obj != NULL) {
|
while (obj != NULL) {
|
||||||
|
if (obj->type == GGML_OBJECT_TENSOR) {
|
||||||
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
||||||
if (strcmp(cur->name, name) == 0) {
|
if (strcmp(cur->name, name) == 0) {
|
||||||
return cur;
|
return cur;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
obj = obj->next;
|
obj = obj->next;
|
||||||
}
|
}
|
||||||
|
@ -15829,6 +15832,35 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
||||||
|
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
||||||
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
||||||
|
|
||||||
|
*cgraph = (struct ggml_cgraph) {
|
||||||
|
/*.n_nodes =*/ 0,
|
||||||
|
/*.n_leafs =*/ 0,
|
||||||
|
/*.nodes =*/ { NULL },
|
||||||
|
/*.grads =*/ { NULL },
|
||||||
|
/*.leafs =*/ { NULL },
|
||||||
|
/*.hash_table =*/ { NULL },
|
||||||
|
/*.perf_runs =*/ 0,
|
||||||
|
/*.perf_cycles =*/ 0,
|
||||||
|
/*.perf_time_us =*/ 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
return cgraph;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
||||||
|
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
||||||
|
ggml_build_forward_impl(cgraph, tensor, false);
|
||||||
|
return cgraph;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ggml_graph_overhead(void) {
|
||||||
|
return GGML_OBJECT_SIZE + GGML_GRAPH_SIZE + 16;
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// thread data
|
// thread data
|
||||||
//
|
//
|
||||||
|
|
20
ggml.h
20
ggml.h
|
@ -396,6 +396,12 @@ extern "C" {
|
||||||
GGML_UNARY_OP_SILU,
|
GGML_UNARY_OP_SILU,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum ggml_object_type {
|
||||||
|
GGML_OBJECT_TENSOR,
|
||||||
|
GGML_OBJECT_GRAPH,
|
||||||
|
GGML_OBJECT_WORK_BUFFER
|
||||||
|
};
|
||||||
|
|
||||||
// ggml object
|
// ggml object
|
||||||
struct ggml_object {
|
struct ggml_object {
|
||||||
size_t offs;
|
size_t offs;
|
||||||
|
@ -403,7 +409,9 @@ extern "C" {
|
||||||
|
|
||||||
struct ggml_object * next;
|
struct ggml_object * next;
|
||||||
|
|
||||||
char padding[8];
|
enum ggml_object_type type;
|
||||||
|
|
||||||
|
char padding[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
||||||
|
@ -424,7 +432,7 @@ extern "C" {
|
||||||
enum ggml_op op;
|
enum ggml_op op;
|
||||||
|
|
||||||
// op params - allocated as int32_t for alignment
|
// op params - allocated as int32_t for alignment
|
||||||
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(uint32_t)];
|
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
||||||
|
|
||||||
bool is_param;
|
bool is_param;
|
||||||
|
|
||||||
|
@ -485,6 +493,8 @@ extern "C" {
|
||||||
int64_t perf_time_us;
|
int64_t perf_time_us;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
|
||||||
|
|
||||||
// scratch buffer
|
// scratch buffer
|
||||||
struct ggml_scratch {
|
struct ggml_scratch {
|
||||||
size_t offs;
|
size_t offs;
|
||||||
|
@ -1391,11 +1401,17 @@ extern "C" {
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * tensor);
|
struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
|
||||||
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
||||||
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
||||||
|
|
||||||
|
// graph allocation in a context
|
||||||
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
|
||||||
|
GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
|
||||||
|
GGML_API size_t ggml_graph_overhead(void);
|
||||||
|
|
||||||
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
||||||
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
||||||
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
|
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue