threadpool: use relaxed order for chunk sync
Full memory barrier is an overkill for this since each thread works on different chunk
This commit is contained in:
parent
db45b6d3a9
commit
307fece5d7
1 changed files with 6 additions and 2 deletions
|
@ -88,6 +88,10 @@ typedef enum {
|
||||||
static void atomic_store(atomic_int * ptr, LONG val) {
|
static void atomic_store(atomic_int * ptr, LONG val) {
|
||||||
InterlockedExchange(ptr, val);
|
InterlockedExchange(ptr, val);
|
||||||
}
|
}
|
||||||
|
static void atomic_store_explicit(atomic_int * ptr, LONG val, memory_order mo) {
|
||||||
|
// TODO: add support for explicit memory order
|
||||||
|
InterlockedExchange(ptr, val);
|
||||||
|
}
|
||||||
static LONG atomic_load(atomic_int * ptr) {
|
static LONG atomic_load(atomic_int * ptr) {
|
||||||
return InterlockedCompareExchange(ptr, 0, 0);
|
return InterlockedCompareExchange(ptr, 0, 0);
|
||||||
}
|
}
|
||||||
|
@ -12472,7 +12476,7 @@ UseGgmlGemm1:;
|
||||||
|
|
||||||
if (ith == 0) {
|
if (ith == 0) {
|
||||||
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
|
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
|
||||||
atomic_store(¶ms->threadpool->current_chunk, nth);
|
atomic_store_explicit(¶ms->threadpool->current_chunk, nth, memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_barrier(params->threadpool);
|
ggml_barrier(params->threadpool);
|
||||||
|
@ -12583,7 +12587,7 @@ UseGgmlGemm2:;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
current_chunk = atomic_fetch_add(¶ms->threadpool->current_chunk, 1);
|
current_chunk = atomic_fetch_add_explicit(¶ms->threadpool->current_chunk, 1, memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue