Avoid INIT synchronization barrier when possible

This change makes inference go ~5% faster for me.
This commit is contained in:
Justine Tunney 2024-05-22 00:31:28 -07:00
parent 7deec14bd9
commit 8435ab0ae8
No known key found for this signature in database
GPG key ID: 52965314629936D4

2
ggml.c
View file

@ -20013,6 +20013,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
}
}
if (GGML_OP_HAS_INIT[node->op]) {
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
task_phase = GGML_TASK_TYPE_COMPUTE;
atomic_store(&state->shared->n_active, n_threads);
@ -20027,6 +20028,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
const bool do_yield = node_n < 0 || cgraph->nodes[node_n]->op == GGML_OP_MUL_MAT;
ggml_graph_compute_thread_sync_task(&task_phase, state, do_yield);
}
}
if (state->ith < n_tasks) {
params.type = GGML_TASK_TYPE_COMPUTE;