ggml : introduce ggml_status (ggml/750)
* using enum as an exit code instead of macros * update return type from enum to unsigned int * indentation fix * compound update ggml_compute_exit_code -> ggml_status changed ggml_status from a bit-field type to simple codes ggml_status to string cast * ggml_status to string cast * GGML_CALL was removed Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
fe52be11e3
commit
9fa2627347
11 changed files with 88 additions and 63 deletions
29
ggml.c
29
ggml.c
|
@ -320,6 +320,16 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16];
|
|||
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
||||
float ggml_table_f32_f16[1 << 16];
|
||||
|
||||
const char * ggml_status_to_string(enum ggml_status status) {
|
||||
switch (status) {
|
||||
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
||||
case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
|
||||
case GGML_STATUS_SUCCESS: return "GGML status: success";
|
||||
case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
|
||||
default: GGML_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
// note: do not use these inside ggml.c
|
||||
// these are meant to be used via the ggml.h API
|
||||
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
||||
|
@ -17400,6 +17410,7 @@ struct ggml_compute_state {
|
|||
ggml_thread_t thrd;
|
||||
int ith;
|
||||
struct ggml_compute_state_shared * shared;
|
||||
enum ggml_status ec;
|
||||
};
|
||||
|
||||
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
||||
|
@ -17693,7 +17704,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||
while (true) {
|
||||
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
||||
state->shared->node_n += 1;
|
||||
return (thread_ret_t) GGML_EXIT_ABORTED;
|
||||
state->ec = GGML_STATUS_ABORTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
||||
|
@ -17815,7 +17827,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||
}
|
||||
}
|
||||
|
||||
return GGML_EXIT_SUCCESS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) {
|
||||
|
@ -18011,7 +18023,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||
return cplan;
|
||||
}
|
||||
|
||||
int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
||||
enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
||||
{
|
||||
GGML_ASSERT(cplan);
|
||||
GGML_ASSERT(cplan->n_threads > 0);
|
||||
|
@ -18055,6 +18067,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|||
.thrd = 0,
|
||||
.ith = j,
|
||||
.shared = &state_shared,
|
||||
.ec = GGML_STATUS_SUCCESS,
|
||||
};
|
||||
|
||||
const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
||||
|
@ -18065,12 +18078,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|||
|
||||
workers[0].ith = 0;
|
||||
workers[0].shared = &state_shared;
|
||||
workers[0].ec = GGML_STATUS_SUCCESS;
|
||||
|
||||
const int64_t perf_start_cycles = ggml_perf_cycles();
|
||||
const int64_t perf_start_time_us = ggml_perf_time_us();
|
||||
|
||||
// this is a work thread too
|
||||
int compute_status = (size_t) ggml_graph_compute_thread(&workers[0]);
|
||||
ggml_graph_compute_thread(&workers[0]);
|
||||
enum ggml_status compute_status = workers[0].ec;
|
||||
|
||||
// don't leave affinity set on the main thread
|
||||
clear_numa_thread_affinity();
|
||||
|
@ -18080,6 +18095,8 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|||
for (int j = 1; j < n_threads; j++) {
|
||||
const int rc = ggml_thread_join(workers[j].thrd, NULL);
|
||||
GGML_ASSERT(rc == 0);
|
||||
if (workers[j].ec != GGML_STATUS_SUCCESS)
|
||||
compute_status = workers[j].ec;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18107,14 +18124,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|||
return compute_status;
|
||||
}
|
||||
|
||||
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
||||
enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
||||
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
||||
|
||||
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
||||
|
||||
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
||||
|
||||
ggml_graph_compute(cgraph, &cplan);
|
||||
return ggml_graph_compute(cgraph, &cplan);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue