Speed up unnamed POSIX semaphores

When sem_wait() used its futexes it would always use process shared mode
which can be problematic on platforms like Windows, where that causes it
to use the slow futex polyfill. Now when sem_init() is called in private
mode that'll be passed along so we can use a faster WaitOnAddress() call
This commit is contained in:
Justine Tunney 2024-09-13 06:25:27 -07:00
parent b5fcb59a85
commit 462ba6909e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
5 changed files with 76 additions and 30 deletions

View file

@ -18,6 +18,7 @@
*/
#include "libc/assert.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/strace.h"
#include "libc/limits.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/semaphore.h"
@ -40,14 +41,20 @@
* @raise EBUSY if `sem` has waiters
*/
int sem_destroy(sem_t *sem) {
int waiters;
int rc, waiters;
npassert(sem->sem_magic != SEM_MAGIC_NAMED);
if (sem->sem_magic != SEM_MAGIC_UNNAMED)
return einval();
if (sem->sem_magic != SEM_MAGIC_UNNAMED) {
rc = einval();
} else {
waiters = atomic_load_explicit(&sem->sem_waiters, memory_order_relaxed);
unassert(waiters >= 0);
if (waiters)
return ebusy();
if (waiters) {
rc = ebusy();
} else {
atomic_store_explicit(&sem->sem_value, INT_MIN, memory_order_relaxed);
return 0;
rc = 0;
}
}
STRACE("sem_destroy(%p) → %d% m", sem, rc);
return rc;
}

View file

@ -19,6 +19,7 @@
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/strace.h"
#include "libc/limits.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/semaphore.h"
@ -37,12 +38,17 @@
* @raise EINVAL if `value` exceeds `SEM_VALUE_MAX`
*/
int sem_init(sem_t *sem, int pshared, unsigned value) {
if (value > SEM_VALUE_MAX)
return einval();
int rc;
if (value > SEM_VALUE_MAX) {
rc = einval();
} else {
sem->sem_magic = SEM_MAGIC_UNNAMED;
atomic_store_explicit(&sem->sem_value, value, memory_order_relaxed);
sem->sem_pshared = !!pshared;
sem->sem_pid = getpid();
sem->sem_waiters = 0;
return 0;
rc = 0;
}
STRACE("sem_init(%p, %hhhd, %u) → %d% m", sem, pshared, value, rc);
return rc;
}

View file

@ -46,7 +46,7 @@ int sem_post(sem_t *sem) {
old = atomic_fetch_add_explicit(&sem->sem_value, 1, memory_order_acq_rel);
unassert(old > INT_MIN);
if (old >= 0) {
wakeups = nsync_futex_wake_(&sem->sem_value, 1, true);
wakeups = nsync_futex_wake_(&sem->sem_value, 1, sem->sem_pshared);
npassert(wakeups >= 0);
rc = 0;
} else {

View file

@ -59,7 +59,7 @@ static void sem_timedwait_cleanup(void *arg) {
* @cancelationpoint
*/
int sem_timedwait(sem_t *sem, const struct timespec *abstime) {
int i, v, rc, e = errno;
int v, rc, e = errno;
#if 0
if (IsXnuSilicon() && sem->sem_magic == SEM_MAGIC_KERNEL) {
@ -103,17 +103,14 @@ int sem_timedwait(sem_t *sem, const struct timespec *abstime) {
}
#endif
for (i = 0; i < 7; ++i) {
rc = sem_trywait(sem);
if (!rc) {
return rc;
} else if (errno == EAGAIN) {
errno = e;
sem_delay(i);
} else {
return rc;
}
}
BEGIN_CANCELATION_POINT;
unassert(atomic_fetch_add_explicit(&sem->sem_waiters, +1,
@ -122,7 +119,8 @@ int sem_timedwait(sem_t *sem, const struct timespec *abstime) {
do {
if (!(v = atomic_load_explicit(&sem->sem_value, memory_order_relaxed))) {
rc = nsync_futex_wait_(&sem->sem_value, v, true, CLOCK_REALTIME, abstime);
rc = nsync_futex_wait_(&sem->sem_value, v, sem->sem_pshared,
CLOCK_REALTIME, abstime);
if (rc == -EINTR || rc == -ECANCELED) {
errno = -rc;
rc = -1;

View file

@ -0,0 +1,35 @@
#include <pthread.h>
#include <semaphore.h>
#define THREADS 10
#define ITERATIONS 100000
int g_count;
sem_t g_sem;
void *worker(void *arg) {
for (int i = 0; i < ITERATIONS; ++i) {
if (sem_wait(&g_sem))
exit(6);
++g_count;
if (sem_post(&g_sem))
exit(7);
}
return 0;
}
int main(int argc, char *argv[]) {
pthread_t th[THREADS];
if (sem_init(&g_sem, 0, 1))
return 1;
for (int i = 0; i < THREADS; ++i)
if (pthread_create(&th[i], 0, worker, 0))
return 2;
for (int i = 0; i < THREADS; ++i)
if (pthread_join(th[i], 0))
return 3;
if (g_count != THREADS * ITERATIONS)
return 4;
if (sem_destroy(&g_sem))
return 5;
}