fixed bad memory access exception on ios 17
This commit is contained in:
parent
c47066d833
commit
4c3a7cdd46
3 changed files with 90 additions and 61 deletions
52
ggml-alloc.c
52
ggml-alloc.c
|
@ -291,21 +291,17 @@ void ggml_allocr_reset(struct ggml_allocr * alloc) {
|
|||
struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment) {
|
||||
struct ggml_allocr * alloc = (struct ggml_allocr *)malloc(sizeof(struct ggml_allocr) /* + n_free_blocks * sizeof(struct free_block) */);
|
||||
|
||||
*alloc = (struct ggml_allocr){
|
||||
/*.data = */ data,
|
||||
/*.size = */ size,
|
||||
/*.alignment = */ alignment,
|
||||
/*.n_free_blocks = */ 0,
|
||||
/*.free_blocks = */ {{0}},
|
||||
/*.hash_table = */ {{0}},
|
||||
/*.max_size = */ 0,
|
||||
/*.measure = */ false,
|
||||
/*.parse_seq = */ {0},
|
||||
/*.parse_seq_len = */ 0,
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
/*.allocated_tensors = */ {0},
|
||||
#endif
|
||||
};
|
||||
(*alloc).data = data;
|
||||
(*alloc).size = size;
|
||||
(*alloc).alignment = alignment;
|
||||
(*alloc).n_free_blocks = 0;
|
||||
(*alloc).max_size = 0;
|
||||
(*alloc).measure = false;
|
||||
(*alloc).parse_seq_len = 0;
|
||||
|
||||
memset((*alloc).free_blocks, 0, sizeof((*alloc).free_blocks));
|
||||
memset((*alloc).hash_table, 0, sizeof((*alloc).hash_table));
|
||||
memset((*alloc).parse_seq, 0, sizeof((*alloc).parse_seq));
|
||||
|
||||
ggml_allocr_reset(alloc);
|
||||
|
||||
|
@ -371,21 +367,21 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
|
|||
|
||||
alloc_measure_vmem(&base_addr, &size);
|
||||
|
||||
*alloc = (struct ggml_allocr){
|
||||
/*.data = */ base_addr,
|
||||
/*.size = */ size,
|
||||
/*.alignment = */ alignment,
|
||||
/*.n_free_blocks = */ 0,
|
||||
/*.free_blocks = */ {{0}},
|
||||
/*.hash_table = */ {{0}},
|
||||
/*.max_size = */ 0,
|
||||
/*.measure = */ true,
|
||||
/*.parse_seq = */ {0},
|
||||
/*.parse_seq_len = */ 0,
|
||||
(*alloc).data = base_addr;
|
||||
(*alloc).size = size;
|
||||
(*alloc).alignment = alignment;
|
||||
(*alloc).n_free_blocks = 0;
|
||||
(*alloc).max_size = 0;
|
||||
(*alloc).measure = true;
|
||||
(*alloc).parse_seq_len = 0;
|
||||
|
||||
memset((*alloc).free_blocks, 0, sizeof((*alloc).free_blocks));
|
||||
memset((*alloc).hash_table, 0, sizeof((*alloc).hash_table));
|
||||
memset((*alloc).parse_seq, 0, sizeof((*alloc).parse_seq));
|
||||
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
/*.allocated_tensors = */ {0},
|
||||
(*alloc).allocated_tensors = {0};
|
||||
#endif
|
||||
};
|
||||
|
||||
ggml_allocr_reset(alloc);
|
||||
|
||||
|
|
49
ggml.c
49
ggml.c
|
@ -4723,18 +4723,20 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||
|
||||
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
||||
|
||||
*ctx = (struct ggml_context) {
|
||||
/*.mem_size =*/ mem_size,
|
||||
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size),
|
||||
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
||||
/*.no_alloc =*/ params.no_alloc,
|
||||
/*.no_alloc_save =*/ params.no_alloc,
|
||||
/*.n_objects =*/ 0,
|
||||
/*.objects_begin =*/ NULL,
|
||||
/*.objects_end =*/ NULL,
|
||||
/*.scratch =*/ { 0, 0, NULL, },
|
||||
/*.scratch_save =*/ { 0, 0, NULL, },
|
||||
};
|
||||
ctx = (struct ggml_context *)malloc(sizeof(struct ggml_context));
|
||||
|
||||
struct ggml_scratch empty_scratch = { 0, 0, NULL };
|
||||
|
||||
(*ctx).mem_size = mem_size;
|
||||
(*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size);
|
||||
(*ctx).mem_buffer_owned = params.mem_buffer ? false : true;
|
||||
(*ctx).no_alloc = params.no_alloc;
|
||||
(*ctx).no_alloc_save = params.no_alloc;
|
||||
(*ctx).n_objects = 0;
|
||||
(*ctx).objects_begin = NULL;
|
||||
(*ctx).objects_end = NULL;
|
||||
(*ctx).scratch = empty_scratch;
|
||||
(*ctx).scratch_save = empty_scratch;
|
||||
|
||||
GGML_ASSERT(ctx->mem_buffer != NULL);
|
||||
|
||||
|
@ -18079,18 +18081,17 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
|||
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
||||
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
||||
|
||||
*cgraph = (struct ggml_cgraph) {
|
||||
/*.n_nodes =*/ 0,
|
||||
/*.n_leafs =*/ 0,
|
||||
/*.nodes =*/ { NULL },
|
||||
/*.grads =*/ { NULL },
|
||||
/*.leafs =*/ { NULL },
|
||||
/*.hash_table =*/ { NULL },
|
||||
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
||||
/*.perf_runs =*/ 0,
|
||||
/*.perf_cycles =*/ 0,
|
||||
/*.perf_time_us =*/ 0,
|
||||
};
|
||||
(*cgraph).n_nodes = 0;
|
||||
(*cgraph).n_leafs = 0;
|
||||
(*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
|
||||
(*cgraph).perf_runs = 0;
|
||||
(*cgraph).perf_cycles = 0;
|
||||
(*cgraph).perf_time_us = 0;
|
||||
|
||||
memset((*cgraph).nodes, 0, sizeof((*cgraph).nodes));
|
||||
memset((*cgraph).grads, 0, sizeof((*cgraph).grads));
|
||||
memset((*cgraph).leafs, 0, sizeof((*cgraph).leafs));
|
||||
memset((*cgraph).visited_hash_table, 0, sizeof((*cgraph).visited_hash_table));
|
||||
|
||||
return cgraph;
|
||||
}
|
||||
|
|
48
llama.cpp
48
llama.cpp
|
@ -8222,7 +8222,21 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
|||
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||
|
||||
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
|
||||
ggml_cgraph gf{};
|
||||
|
||||
// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
|
||||
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));
|
||||
|
||||
(*gf).n_nodes = 0;
|
||||
(*gf).n_leafs = 0;
|
||||
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
|
||||
(*gf).perf_runs = 0;
|
||||
(*gf).perf_cycles = 0;
|
||||
(*gf).perf_time_us = 0;
|
||||
|
||||
memset((*gf).nodes, 0, sizeof((*gf).nodes));
|
||||
memset((*gf).grads, 0, sizeof((*gf).grads));
|
||||
memset((*gf).leafs, 0, sizeof((*gf).leafs));
|
||||
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));
|
||||
|
||||
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
||||
std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
|
||||
|
@ -8240,9 +8254,9 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
|||
kv_head, n_embd, n_layer,
|
||||
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);
|
||||
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d));
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
|
||||
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
|
||||
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d));
|
||||
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d));
|
||||
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);
|
||||
|
||||
ggml_free(cpy_ctx);
|
||||
|
||||
|
@ -8250,6 +8264,10 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
|||
// write them to file
|
||||
data_ctx->write(kout3d_data.data(), kout3d_data.size());
|
||||
data_ctx->write(vout3d_data.data(), vout3d_data.size());
|
||||
|
||||
// free our allocated graph
|
||||
free(gf);
|
||||
gf = NULL;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < kv_size; ++i) {
|
||||
|
@ -8350,7 +8368,21 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|||
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||
|
||||
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
|
||||
ggml_cgraph gf{};
|
||||
|
||||
// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
|
||||
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));
|
||||
|
||||
(*gf).n_nodes = 0;
|
||||
(*gf).n_leafs = 0;
|
||||
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
|
||||
(*gf).perf_runs = 0;
|
||||
(*gf).perf_cycles = 0;
|
||||
(*gf).perf_time_us = 0;
|
||||
|
||||
memset((*gf).nodes, 0, sizeof((*gf).nodes));
|
||||
memset((*gf).grads, 0, sizeof((*gf).grads));
|
||||
memset((*gf).leafs, 0, sizeof((*gf).leafs));
|
||||
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));
|
||||
|
||||
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
||||
kin3d->data = (void *) inp;
|
||||
|
@ -8368,9 +8400,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|||
kv_head, n_embd, n_layer,
|
||||
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);
|
||||
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d));
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
|
||||
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
|
||||
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d));
|
||||
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d));
|
||||
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);
|
||||
|
||||
ggml_free(cpy_ctx);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue