Make more improvements to threads and mappings

- NetBSD should now have faster synchronization
- POSIX barriers may now be shared across processes
- An edge case with memory map tracking has been fixed
- Grand Central Dispatch is no longer used on MacOS ARM64
- POSIX mutexes in normal mode now use futexes across processes
This commit is contained in:
Justine Tunney 2024-07-24 01:05:00 -07:00
parent 2187d6d2dd
commit e398f3887c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
20 changed files with 566 additions and 171 deletions

View file

@ -26,11 +26,9 @@
int begin_cancelation_point(void) { int begin_cancelation_point(void) {
int state = 0; int state = 0;
struct CosmoTib *tib;
struct PosixThread *pt;
if (__tls_enabled) { if (__tls_enabled) {
tib = __get_tls(); struct PosixThread *pt;
if ((pt = (struct PosixThread *)tib->tib_pthread)) { if ((pt = _pthread_self())) {
state = pt->pt_flags & PT_INCANCEL; state = pt->pt_flags & PT_INCANCEL;
pt->pt_flags |= PT_INCANCEL; pt->pt_flags |= PT_INCANCEL;
} }
@ -39,11 +37,9 @@ int begin_cancelation_point(void) {
} }
void end_cancelation_point(int state) { void end_cancelation_point(int state) {
struct CosmoTib *tib;
struct PosixThread *pt;
if (__tls_enabled) { if (__tls_enabled) {
tib = __get_tls(); struct PosixThread *pt;
if ((pt = (struct PosixThread *)tib->tib_pthread)) { if ((pt = _pthread_self())) {
pt->pt_flags &= ~PT_INCANCEL; pt->pt_flags &= ~PT_INCANCEL;
pt->pt_flags |= state; pt->pt_flags |= state;
} }

View file

@ -6,6 +6,8 @@
#include "libc/thread/tls2.internal.h" #include "libc/thread/tls2.internal.h"
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
#define MAPS_RETRY ((void *)-1)
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e) #define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
struct Map { struct Map {

View file

@ -120,6 +120,7 @@ static int __muntrack(char *addr, size_t size, int pagesz,
struct Map *map; struct Map *map;
struct Map *next; struct Map *next;
struct Map *floor; struct Map *floor;
StartOver:
floor = __maps_floor(addr); floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = next) { for (map = floor; map && map->addr <= addr + size; map = next) {
next = __maps_next(map); next = __maps_next(map);
@ -148,6 +149,8 @@ static int __muntrack(char *addr, size_t size, int pagesz,
ASSERT(left > 0); ASSERT(left > 0);
struct Map *leftmap; struct Map *leftmap;
if ((leftmap = __maps_alloc())) { if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
map->addr += left; map->addr += left;
map->size = right; map->size = right;
if (!(map->flags & MAP_ANONYMOUS)) if (!(map->flags & MAP_ANONYMOUS))
@ -167,6 +170,8 @@ static int __muntrack(char *addr, size_t size, int pagesz,
size_t right = map_addr + map_size - addr; size_t right = map_addr + map_size - addr;
struct Map *rightmap; struct Map *rightmap;
if ((rightmap = __maps_alloc())) { if ((rightmap = __maps_alloc())) {
if (rightmap == MAPS_RETRY)
goto StartOver;
map->size = left; map->size = left;
__maps.pages -= (right + pagesz - 1) / pagesz; __maps.pages -= (right + pagesz - 1) / pagesz;
rightmap->addr = addr; rightmap->addr = addr;
@ -184,8 +189,14 @@ static int __muntrack(char *addr, size_t size, int pagesz,
size_t right = map_size - middle - left; size_t right = map_size - middle - left;
struct Map *leftmap; struct Map *leftmap;
if ((leftmap = __maps_alloc())) { if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
struct Map *middlemap; struct Map *middlemap;
if ((middlemap = __maps_alloc())) { if ((middlemap = __maps_alloc())) {
if (middlemap == MAPS_RETRY) {
__maps_free(leftmap);
goto StartOver;
}
leftmap->addr = map_addr; leftmap->addr = map_addr;
leftmap->size = left; leftmap->size = left;
leftmap->off = map->off; leftmap->off = map->off;
@ -204,6 +215,7 @@ static int __muntrack(char *addr, size_t size, int pagesz,
*deleted = middlemap; *deleted = middlemap;
__maps_check(); __maps_check();
} else { } else {
__maps_free(leftmap);
rc = -1; rc = -1;
} }
} else { } else {
@ -304,12 +316,11 @@ struct Map *__maps_alloc(void) {
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK; map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK;
map->hand = sys.maphandle; map->hand = sys.maphandle;
__maps_lock(); __maps_lock();
__maps_insert(map++); __maps_insert(map);
__maps_unlock(); __maps_unlock();
map->addr = MAP_FAILED; for (int i = 1; i < gransz / sizeof(struct Map); ++i)
for (int i = 1; i < gransz / sizeof(struct Map) - 1; ++i)
__maps_free(map + i); __maps_free(map + i);
return map; return MAPS_RETRY;
} }
static int __munmap(char *addr, size_t size) { static int __munmap(char *addr, size_t size) {
@ -396,21 +407,32 @@ void *__maps_pickaddr(size_t size) {
static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd, static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
int64_t off, int pagesz, int gransz) { int64_t off, int pagesz, int gransz) {
// allocate Map object
struct Map *map;
do {
if (!(map = __maps_alloc()))
return MAP_FAILED;
} while (map == MAPS_RETRY);
// polyfill nuances of fixed mappings // polyfill nuances of fixed mappings
int sysflags = flags; int sysflags = flags;
bool noreplace = false; bool noreplace = false;
bool should_untrack = false; bool should_untrack = false;
if (flags & MAP_FIXED_NOREPLACE) { if (flags & MAP_FIXED_NOREPLACE) {
if (flags & MAP_FIXED) if (flags & MAP_FIXED) {
__maps_free(map);
return (void *)einval(); return (void *)einval();
}
sysflags &= ~MAP_FIXED_NOREPLACE; sysflags &= ~MAP_FIXED_NOREPLACE;
if (IsLinux()) { if (IsLinux()) {
noreplace = true; noreplace = true;
sysflags |= MAP_FIXED_NOREPLACE_linux; sysflags |= MAP_FIXED_NOREPLACE_linux;
} else if (IsFreebsd() || IsNetbsd()) { } else if (IsFreebsd() || IsNetbsd()) {
sysflags |= MAP_FIXED; sysflags |= MAP_FIXED;
if (__maps_overlaps(addr, size, pagesz)) if (__maps_overlaps(addr, size, pagesz)) {
__maps_free(map);
return (void *)eexist(); return (void *)eexist();
}
} else { } else {
noreplace = true; noreplace = true;
} }
@ -418,11 +440,6 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
should_untrack = true; should_untrack = true;
} }
// allocate Map object
struct Map *map;
if (!(map = __maps_alloc()))
return MAP_FAILED;
// remove mapping we blew away // remove mapping we blew away
if (IsWindows() && should_untrack) if (IsWindows() && should_untrack)
__munmap(addr, size); __munmap(addr, size);
@ -572,23 +589,27 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
return (void *)einval(); return (void *)einval();
} }
// allocate object for tracking new mapping
struct Map *map;
do {
if (!(map = __maps_alloc()))
return (void *)enomem();
} while (map == MAPS_RETRY);
// check old interval is fully contained within one mapping // check old interval is fully contained within one mapping
struct Map *old_map; struct Map *old_map;
if (!(old_map = __maps_floor(old_addr)) || if (!(old_map = __maps_floor(old_addr)) ||
old_addr + old_size > old_map->addr + PGUP(old_map->size) || old_addr + old_size > old_map->addr + PGUP(old_map->size) ||
old_addr < old_map->addr) old_addr < old_map->addr) {
__maps_free(map);
return (void *)efault(); return (void *)efault();
}
// save old properties // save old properties
int old_off = old_map->off; int old_off = old_map->off;
int old_prot = old_map->prot; int old_prot = old_map->prot;
int old_flags = old_map->flags; int old_flags = old_map->flags;
// allocate object for tracking new mapping
struct Map *map;
if (!(map = __maps_alloc()))
return (void *)enomem();
// netbsd mremap fixed returns enoent rather than unmapping old pages // netbsd mremap fixed returns enoent rather than unmapping old pages
if (IsNetbsd() && (flags & MREMAP_FIXED)) if (IsNetbsd() && (flags & MREMAP_FIXED))
if (__munmap(new_addr, new_size)) { if (__munmap(new_addr, new_size)) {

View file

@ -75,6 +75,7 @@ int __mprotect(char *addr, size_t size, int prot) {
return edeadlk(); return edeadlk();
} }
struct Map *map, *floor; struct Map *map, *floor;
StartOver:
floor = __maps_floor(addr); floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) { for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) {
char *map_addr = map->addr; char *map_addr = map->addr;
@ -93,10 +94,12 @@ int __mprotect(char *addr, size_t size, int prot) {
} }
} else if (addr <= map_addr) { } else if (addr <= map_addr) {
// change lefthand side of mapping // change lefthand side of mapping
size_t left = PGUP(addr + size - map_addr); size_t left = addr + size - map_addr;
size_t right = map_size - left; size_t right = map_size - left;
struct Map *leftmap; struct Map *leftmap;
if ((leftmap = __maps_alloc())) { if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
if (!__mprotect_chunk(map_addr, left, prot, false)) { if (!__mprotect_chunk(map_addr, left, prot, false)) {
leftmap->addr = map_addr; leftmap->addr = map_addr;
leftmap->size = left; leftmap->size = left;
@ -127,6 +130,8 @@ int __mprotect(char *addr, size_t size, int prot) {
size_t right = map_addr + map_size - addr; size_t right = map_addr + map_size - addr;
struct Map *leftmap; struct Map *leftmap;
if ((leftmap = __maps_alloc())) { if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
if (!__mprotect_chunk(map_addr + left, right, prot, false)) { if (!__mprotect_chunk(map_addr + left, right, prot, false)) {
leftmap->addr = map_addr; leftmap->addr = map_addr;
leftmap->size = left; leftmap->size = left;
@ -159,8 +164,14 @@ int __mprotect(char *addr, size_t size, int prot) {
size_t right = map_size - middle - left; size_t right = map_size - middle - left;
struct Map *leftmap; struct Map *leftmap;
if ((leftmap = __maps_alloc())) { if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
struct Map *midlmap; struct Map *midlmap;
if ((midlmap = __maps_alloc())) { if ((midlmap = __maps_alloc())) {
if (midlmap == MAPS_RETRY) {
__maps_free(leftmap);
goto StartOver;
}
if (!__mprotect_chunk(map_addr + left, middle, prot, false)) { if (!__mprotect_chunk(map_addr + left, middle, prot, false)) {
leftmap->addr = map_addr; leftmap->addr = map_addr;
leftmap->size = left; leftmap->size = left;

View file

@ -27,41 +27,47 @@
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/thread/lock.h" #include "libc/thread/lock.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h" #include "third_party/nsync/mu.h"
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) { static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
int me;
int backoff = 0; int backoff = 0;
uint64_t word, lock; uint64_t lock;
for (;;) {
// get current state of lock word = MUTEX_UNLOCK(word);
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed); lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
#if PTHREAD_USE_NSYNC memory_order_acquire,
// use fancy nsync mutex if possible memory_order_relaxed))
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && // return;
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && // backoff = pthread_delay_np(mutex, backoff);
_weaken(nsync_mu_lock)) {
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
return 0;
} }
#endif }
// implement barebones normal mutexes // see "take 3" algorithm in "futexes are tricky" by ulrich drepper
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) { // slightly improved to attempt acquiring multiple times b4 syscall
for (;;) { static void pthread_mutex_lock_drepper(atomic_int *futex, char pshare) {
word = MUTEX_UNLOCK(word); int word;
lock = MUTEX_LOCK(word); for (int i = 0; i < 4; ++i) {
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock, word = 0;
memory_order_acquire, if (atomic_compare_exchange_strong_explicit(
memory_order_relaxed)) futex, &word, 1, memory_order_acquire, memory_order_acquire))
return 0; return;
backoff = pthread_delay_np(mutex, backoff); pthread_pause_np();
}
} }
if (word == 1)
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
while (word > 0) {
_weaken(nsync_futex_wait_)(futex, 2, pshare, 0);
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
}
}
// implement recursive mutexes static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
me = gettid(); uint64_t word) {
uint64_t lock;
int backoff = 0;
int me = gettid();
for (;;) { for (;;) {
if (MUTEX_OWNER(word) == me) { if (MUTEX_OWNER(word) == me) {
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) { if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
@ -91,6 +97,36 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
} }
} }
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
uint64_t word;
// get current state of lock
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
#if PTHREAD_USE_NSYNC
// use superior mutexes if possible
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
_weaken(nsync_mu_lock)) {
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
return 0;
}
#endif
// handle normal mutexes
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
if (_weaken(nsync_futex_wait_)) {
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_lock_naive(mutex, word);
}
return 0;
}
// handle recursive and error checking mutexes
return pthread_mutex_lock_recursive(mutex, word);
}
/** /**
* Locks mutex. * Locks mutex.
* *

View file

@ -24,54 +24,33 @@
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/thread/lock.h" #include "libc/thread/lock.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h" #include "third_party/nsync/mu.h"
/** static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
* Attempts acquiring lock. uint64_t word) {
* uint64_t lock;
* Unlike pthread_mutex_lock() this function won't block and instead word = MUTEX_UNLOCK(word);
* returns an error immediately if the lock couldn't be acquired. lock = MUTEX_LOCK(word);
* if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
* @return 0 if lock was acquired, otherwise an errno memory_order_acquire,
* @raise EAGAIN if maximum number of recursive locks is held memory_order_relaxed))
* @raise EBUSY if lock is currently held in read or write mode return 0;
* @raise EINVAL if `mutex` doesn't refer to an initialized lock return EBUSY;
* @raise EDEADLK if `mutex` is `PTHREAD_MUTEX_ERRORCHECK` and the }
* current thread already holds this mutex
*/
errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
int me;
uint64_t word, lock;
// get current state of lock static errno_t pthread_mutex_trylock_drepper(atomic_int *futex) {
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed); int word = 0;
if (atomic_compare_exchange_strong_explicit(
futex, &word, 1, memory_order_acquire, memory_order_acquire))
return 0;
return EBUSY;
}
#if PTHREAD_USE_NSYNC static errno_t pthread_mutex_trylock_recursive(pthread_mutex_t *mutex,
// delegate to *NSYNC if possible uint64_t word) {
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && uint64_t lock;
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && // int me = gettid();
_weaken(nsync_mu_trylock)) {
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
return 0;
} else {
return EBUSY;
}
}
#endif
// handle normal mutexes
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return 0;
return EBUSY;
}
// handle recursive and error check mutexes
me = gettid();
for (;;) { for (;;) {
if (MUTEX_OWNER(word) == me) { if (MUTEX_OWNER(word) == me) {
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) { if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
@ -100,3 +79,47 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
return EBUSY; return EBUSY;
} }
} }
/**
* Attempts acquiring lock.
*
* Unlike pthread_mutex_lock() this function won't block and instead
* returns an error immediately if the lock couldn't be acquired.
*
* @return 0 if lock was acquired, otherwise an errno
* @raise EAGAIN if maximum number of recursive locks is held
* @raise EBUSY if lock is currently held in read or write mode
* @raise EINVAL if `mutex` doesn't refer to an initialized lock
* @raise EDEADLK if `mutex` is `PTHREAD_MUTEX_ERRORCHECK` and the
* current thread already holds this mutex
*/
errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
// get current state of lock
uint64_t word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
#if PTHREAD_USE_NSYNC
// use superior mutexes if possible
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
_weaken(nsync_mu_trylock)) {
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
return 0;
} else {
return EBUSY;
}
}
#endif
// handle normal mutexes
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
if (_weaken(nsync_futex_wait_)) {
return pthread_mutex_trylock_drepper(&mutex->_futex);
} else {
return pthread_mutex_trylock_naive(mutex, word);
}
}
// handle recursive and error checking mutexes
return pthread_mutex_trylock_recursive(mutex, word);
}

View file

@ -25,45 +25,26 @@
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/thread/lock.h" #include "libc/thread/lock.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h" #include "third_party/nsync/mu.h"
/** static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
* Releases mutex. uint64_t lock = MUTEX_UNLOCK(word);
* atomic_store_explicit(&mutex->_word, lock, memory_order_release);
* This function does nothing in vfork() children. }
*
* @return 0 on success or error number on failure
* @raises EPERM if in error check mode and not owned by caller
* @vforksafe
*/
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
int me;
uint64_t word, lock;
LOCKTRACE("pthread_mutex_unlock(%t)", mutex); // see "take 3" algorithm in "futexes are tricky" by ulrich drepper
static void pthread_mutex_unlock_drepper(atomic_int *futex, char pshare) {
// get current state of lock int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed); if (word == 2) {
atomic_store_explicit(futex, 0, memory_order_release);
#if PTHREAD_USE_NSYNC _weaken(nsync_futex_wake_)(futex, 1, pshare);
// use fancy nsync mutex if possible
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
_weaken(nsync_mu_unlock)) {
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
return 0;
} }
#endif }
// implement barebones normal mutexes static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) { uint64_t word) {
lock = MUTEX_UNLOCK(word); int me = gettid();
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
return 0;
}
// implement recursive mutex unlocking
me = gettid();
for (;;) { for (;;) {
// we allow unlocking an initialized lock that wasn't locked, but we // we allow unlocking an initialized lock that wasn't locked, but we
@ -88,3 +69,44 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
return 0; return 0;
} }
} }
/**
* Releases mutex.
*
* This function does nothing in vfork() children.
*
* @return 0 on success or error number on failure
* @raises EPERM if in error check mode and not owned by caller
* @vforksafe
*/
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
uint64_t word;
LOCKTRACE("pthread_mutex_unlock(%t)", mutex);
// get current state of lock
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
#if PTHREAD_USE_NSYNC
// use superior mutexes if possible
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
_weaken(nsync_mu_unlock)) {
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
return 0;
}
#endif
// implement barebones normal mutexes
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
if (_weaken(nsync_futex_wake_)) {
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_unlock_naive(mutex, word);
}
return 0;
}
// handle recursive and error checking mutexes
return pthread_mutex_unlock_recursive(mutex, word);
}

View file

@ -32,7 +32,7 @@ void sys_sched_yield(void);
int pthread_yield_np(void) { int pthread_yield_np(void) {
if (IsXnuSilicon()) { if (IsXnuSilicon()) {
__syslib->__pthread_yield_np(); __syslib->__pthread_yield_np();
} else if (IsOpenbsd() || IsNetbsd()) { } else if (IsOpenbsd()) {
// sched_yield() is punishingly slow on OpenBSD // sched_yield() is punishingly slow on OpenBSD
// it's ruinously slow it'll destroy everything // it's ruinously slow it'll destroy everything
pthread_pause_np(); pthread_pause_np();

View file

@ -16,9 +16,10 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "third_party/nsync/counter.h"
/** /**
* Destroys barrier. * Destroys barrier.
@ -27,9 +28,8 @@
* @raise EINVAL if threads are still inside the barrier * @raise EINVAL if threads are still inside the barrier
*/ */
errno_t pthread_barrier_destroy(pthread_barrier_t *barrier) { errno_t pthread_barrier_destroy(pthread_barrier_t *barrier) {
if (barrier->_nsync) { if (atomic_load_explicit(&barrier->_waiters, memory_order_relaxed))
nsync_counter_free(barrier->_nsync); return EINVAL;
barrier->_nsync = 0; memset(barrier, -1, sizeof(*barrier));
}
return 0; return 0;
} }

View file

@ -17,8 +17,9 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/limits.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "third_party/nsync/counter.h"
/** /**
* Initializes barrier. * Initializes barrier.
@ -28,16 +29,17 @@
* before the barrier is released, which must be greater than zero * before the barrier is released, which must be greater than zero
* @return 0 on success, or error number on failure * @return 0 on success, or error number on failure
* @raise EINVAL if `count` isn't greater than zero * @raise EINVAL if `count` isn't greater than zero
* @raise ENOMEM if insufficient memory exists
*/ */
errno_t pthread_barrier_init(pthread_barrier_t *barrier, errno_t pthread_barrier_init(pthread_barrier_t *barrier,
const pthread_barrierattr_t *attr, const pthread_barrierattr_t *attr,
unsigned count) { unsigned count) {
nsync_counter c;
if (!count) if (!count)
return EINVAL; return EINVAL;
if (!(c = nsync_counter_new(count))) if (count > INT_MAX)
return ENOMEM; return EINVAL;
*barrier = (pthread_barrier_t){._nsync = c}; barrier->_count = count;
barrier->_pshared = attr ? *attr : PTHREAD_PROCESS_PRIVATE;
atomic_store_explicit(&barrier->_counter, count, memory_order_relaxed);
atomic_store_explicit(&barrier->_waiters, 0, memory_order_relaxed);
return 0; return 0;
} }

View file

@ -16,25 +16,53 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/calls/blockcancel.internal.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/limits.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "third_party/nsync/counter.h" #include "third_party/nsync/futex.internal.h"
/** /**
* Waits for all threads to arrive at barrier. * Waits for all threads to arrive at barrier.
* *
* When the barrier is broken, the state becomes reset to what it was * When the barrier is broken, the state becomes reset to what it was
* when pthread_barrier_init() was called, so that the barrior may be * when pthread_barrier_init() was called, so that the barrior may be
* used again in the same way. The last thread to arrive shall be the * used again in the same way.
* last to leave and it returns a magic value. *
* Unlike pthread_cond_timedwait() this function is not a cancelation
* point. It is not needed to have cleanup handlers on block cancels.
* *
* @return 0 on success, `PTHREAD_BARRIER_SERIAL_THREAD` to one lucky * @return 0 on success, `PTHREAD_BARRIER_SERIAL_THREAD` to one lucky
* thread which was the last arrival, or an errno on error * thread which was the last arrival, or an errno on error
* @raise EINVAL if barrier is used incorrectly
*/ */
errno_t pthread_barrier_wait(pthread_barrier_t *barrier) { errno_t pthread_barrier_wait(pthread_barrier_t *barrier) {
if (nsync_counter_add(barrier->_nsync, -1)) { int n;
nsync_counter_wait(barrier->_nsync, nsync_time_no_deadline);
return 0; // enter barrier
} else { atomic_fetch_add_explicit(&barrier->_waiters, 1, memory_order_acq_rel);
n = atomic_fetch_sub_explicit(&barrier->_counter, 1, memory_order_acq_rel);
n = n - 1;
// this can only happen on invalid usage
if (n < 0)
return EINVAL;
// reset count and wake waiters if we're last at barrier
if (!n) {
atomic_store_explicit(&barrier->_counter, barrier->_count,
memory_order_release);
atomic_store_explicit(&barrier->_waiters, 0, memory_order_release);
nsync_futex_wake_(&barrier->_waiters, INT_MAX, barrier->_pshared);
return PTHREAD_BARRIER_SERIAL_THREAD; return PTHREAD_BARRIER_SERIAL_THREAD;
} }
// wait for everyone else to arrive at barrier
BLOCK_CANCELATION;
while ((n = atomic_load_explicit(&barrier->_waiters, memory_order_acquire)))
nsync_futex_wait_(&barrier->_waiters, n, barrier->_pshared, 0);
ALLOW_CANCELATION;
return 0;
} }

View file

@ -23,7 +23,7 @@
* *
* @param pshared is set to one of the following * @param pshared is set to one of the following
* - `PTHREAD_PROCESS_PRIVATE` (default) * - `PTHREAD_PROCESS_PRIVATE` (default)
* - `PTHREAD_PROCESS_SHARED` (unsupported) * - `PTHREAD_PROCESS_SHARED`
* @return 0 on success, or error on failure * @return 0 on success, or error on failure
*/ */
errno_t pthread_barrierattr_getpshared(const pthread_barrierattr_t *attr, errno_t pthread_barrierattr_getpshared(const pthread_barrierattr_t *attr,

View file

@ -24,6 +24,6 @@
* @return 0 on success, or error on failure * @return 0 on success, or error on failure
*/ */
errno_t pthread_barrierattr_init(pthread_barrierattr_t *attr) { errno_t pthread_barrierattr_init(pthread_barrierattr_t *attr) {
*attr = 0; *attr = PTHREAD_PROCESS_PRIVATE;
return 0; return 0;
} }

View file

@ -24,13 +24,14 @@
* *
* @param pshared can be one of * @param pshared can be one of
* - `PTHREAD_PROCESS_PRIVATE` (default) * - `PTHREAD_PROCESS_PRIVATE` (default)
* - `PTHREAD_PROCESS_SHARED` (unsupported) * - `PTHREAD_PROCESS_SHARED`
* @return 0 on success, or error on failure * @return 0 on success, or error on failure
* @raises EINVAL if `pshared` is invalid * @raises EINVAL if `pshared` is invalid
*/ */
errno_t pthread_barrierattr_setpshared(pthread_barrierattr_t *attr, errno_t pthread_barrierattr_setpshared(pthread_barrierattr_t *attr,
int pshared) { int pshared) {
switch (pshared) { switch (pshared) {
case PTHREAD_PROCESS_SHARED:
case PTHREAD_PROCESS_PRIVATE: case PTHREAD_PROCESS_PRIVATE:
*attr = pshared; *attr = pshared;
return 0; return 0;

View file

@ -46,7 +46,7 @@ COSMOPOLITAN_C_START_
#define PTHREAD_RWLOCK_INITIALIZER {0} #define PTHREAD_RWLOCK_INITIALIZER {0}
#define PTHREAD_MUTEX_INITIALIZER {0} #define PTHREAD_MUTEX_INITIALIZER {0}
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, 0, PTHREAD_MUTEX_RECURSIVE} #define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, {}, PTHREAD_MUTEX_RECURSIVE}
typedef uintptr_t pthread_t; typedef uintptr_t pthread_t;
typedef int pthread_id_np_t; typedef int pthread_id_np_t;
@ -66,7 +66,10 @@ typedef struct pthread_spinlock_s {
typedef struct pthread_mutex_s { typedef struct pthread_mutex_s {
uint32_t _nsync; uint32_t _nsync;
int32_t _pid; union {
int32_t _pid;
_Atomic(int32_t) _futex;
};
_Atomic(uint64_t) _word; _Atomic(uint64_t) _word;
} pthread_mutex_t; } pthread_mutex_t;
@ -92,7 +95,10 @@ typedef struct pthread_rwlock_s {
} pthread_rwlock_t; } pthread_rwlock_t;
typedef struct pthread_barrier_s { typedef struct pthread_barrier_s {
void *_nsync; int _count;
char _pshared;
_Atomic(int) _counter;
_Atomic(int) _waiters;
} pthread_barrier_t; } pthread_barrier_t;
typedef struct pthread_attr_s { typedef struct pthread_attr_s {

View file

@ -0,0 +1,236 @@
#include <assert.h>
#include <cosmo.h>
#include <linux/futex.h>
#include <pthread.h>
#include <stdatomic.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <time.h>
#include <unistd.h>
#include "third_party/nsync/futex.internal.h"
// THIS IS AN EXAMPLE OF HOW TO USE COSMOPOLITAN FUTEXES TO IMPLEMENT
// YOUR OWN MUTEXES FROM SCRATCH. LOOK AT HOW MUCH BETTER THIS IT CAN
// MAKE THINGS COMPARED TO SPIN LOCKS. ALGORITHM FROM ULRICH DREPPER.
// arm fleet
// with futexes
// 30 threads / 100000 iterations
//
// 242,604 us real
// 4,222,946 us user
// 1,079,229 us sys
// footek_test on studio.test. 630 µs 17'415 µs 256'782 µs
// 1,362,557 us real
// 3,232,978 us user
// 2,104,824 us sys
// footek_test on pi.test. 611 µs 21'708 µs 1'385'129 µs
// 1,346,482 us real
// 3,370,513 us user
// 1,992,383 us sys
// footek_test on freebsdarm.test. 427 µs 19'967 µs 1'393'476 µs
// arm fleet
// without futexes
// 30 threads / 100000 iterations
//
// 1,282,084 us real
// 29,359,582 us user
// 34,553 us sys
// footek_test on studio.test. 961 µs 12'907 µs 1'287'983 µs
// 4,070,988 us real
// 16,203,990 us user
// 7,999 us sys
// footek_test on pi.test. 459 µs 16'376 µs 4'095'512 µs
// 7,012,493 us real
// 27,936,725 us user
// 7,871 us sys
// footek_test on freebsdarm.test. 502 µs 16'446 µs 7'051'545 µs
// x86 fleet
// with futexes
// 30 threads / 100000 iterations
//
// 146,015 us real
// 169,427 us user
// 68,939 us sys
// footek_test on rhel7.test. 376 µs 2'259 µs 153'024 µs
// 144,917 us real
// 383,317 us user
// 191,203 us sys
// footek_test on xnu.test. 11'143 µs 9'159 µs 164'865 µs
// 244,286 us real
// 405,395 us user
// 956,122 us sys
// footek_test on freebsd.test. 394 µs 2'165 µs 256'227 µs
// 209,095 us real
// 616,634 us user
// 9,945 us sys
// footek_test on netbsd.test. 502 µs 2'020 µs 261'895 µs
// 344,876 us real
// 50,000 us user
// 1,240,000 us sys
// footek_test on openbsd.test. 457 µs 2'737 µs 396'342 µs
// 1,193,906 us real
// 17,546,875 us user
// 3,000,000 us sys
// footek_test on win10.test. 462 µs 59'528 µs 1'348'265 µs
// x86 fleet
// without futexes
// 30 threads / 100000 iterations
//
// 897,815 us real
// 1,763,705 us user
// 9,696 us sys
// footek_test on rhel7.test. 423 µs 2'638 µs 912'241 µs
// 790,332 us real
// 2,359,967 us user
// 0 us sys
// footek_test on netbsd.test. 1'151 µs 2'634 µs 1'014'867 µs
// 2,332,724 us real
// 9,150,000 us user
// 10,000 us sys
// footek_test on openbsd.test. 557 µs 3'020 µs 2'554'648 µs
// 2,528,863 us real
// 56,546,875 us user
// 1,671,875 us sys
// footek_test on win10.test. 962 µs 9'698 µs 2'751'905 µs
// 2,916,033 us real
// 17,236,103 us user
// 0 us sys
// footek_test on freebsd.test. 690 µs 3'011 µs 2'925'997 µs
// 4,225,726 us real
// 16,679,456 us user
// 16,265 us sys
// footek_test on xnu.test. 98'468 µs 5'242 µs 5'191'724 µs
#define USE_FUTEX 1
#define THREADS 30
#define ITERATIONS 30000
#define MUTEX_LOCKED(word) ((word) & 8)
#define MUTEX_WAITING(word) ((word) & 16)
#define MUTEX_LOCK(word) ((word) | 8)
#define MUTEX_SET_WAITING(word) ((word) | 16)
#define MUTEX_UNLOCK(word) ((word) & ~(8 | 16))
void lock(atomic_int *futex) {
int word, cs;
for (int i = 0; i < 4; ++i) {
word = 0;
if (atomic_compare_exchange_strong_explicit(
futex, &word, 1, memory_order_acquire, memory_order_acquire))
return;
pthread_pause_np();
}
if (word == 1)
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
while (word > 0) {
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
#if USE_FUTEX
nsync_futex_wait_(futex, 2, 0, 0);
#endif
pthread_setcancelstate(cs, 0);
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
}
}
void unlock(atomic_int *futex) {
int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
if (word == 2) {
atomic_store_explicit(futex, 0, memory_order_release);
#if USE_FUTEX
nsync_futex_wake_(futex, 1, 0);
#endif
}
}
int g_chores;
atomic_int g_lock;
pthread_mutex_t g_locker;
void *worker(void *arg) {
for (int i = 0; i < ITERATIONS; ++i) {
lock(&g_lock);
++g_chores;
unlock(&g_lock);
}
return 0;
}
int main() {
struct timeval start;
gettimeofday(&start, 0);
pthread_t th[THREADS];
for (int i = 0; i < THREADS; ++i)
pthread_create(&th[i], 0, worker, 0);
for (int i = 0; i < THREADS; ++i)
pthread_join(th[i], 0);
npassert(g_chores == THREADS * ITERATIONS);
struct rusage ru;
struct timeval end;
gettimeofday(&end, 0);
getrusage(RUSAGE_SELF, &ru);
printf("%,16ld us real\n"
"%,16ld us user\n"
"%,16ld us sys\n",
timeval_tomicros(timeval_sub(end, start)), //
timeval_tomicros(ru.ru_utime), //
timeval_tomicros(ru.ru_stime));
CheckForMemoryLeaks();
}
// COMPARE ULRICH DREPPER'S LOCKING ALGORITHM WITH MIKE BURROWS *NSYNC
// WHICH IS WHAT COSMOPOLITAN LIBC USES FOR YOUR POSIX THREADS MUTEXES
// x86 fleet
// with pthread_mutex_t
// 30 threads / 100000 iterations
//
// 186,976 us real
// 43,609 us user
// 205,585 us sys
// footek_test on freebsd.test. 410 µs 2'054 µs 195'339 µs
// 238,902 us real
// 235,743 us user
// 97,881 us sys
// footek_test on rhel7.test. 343 µs 2'339 µs 246'926 µs
// 201,285 us real
// 249,612 us user
// 141,230 us sys
// footek_test on xnu.test. 1'960 µs 5'350 µs 265'758 µs
// 303,363 us real
// 60,000 us user
// 410,000 us sys
// footek_test on openbsd.test. 545 µs 3'023 µs 326'200 µs
// 386,085 us real
// 586,455 us user
// 466,991 us sys
// footek_test on netbsd.test. 344 µs 2'421 µs 413'440 µs
// 245,010 us real
// 437,500 us user
// 140,625 us sys
// footek_test on win10.test. 300 µs 18'574 µs 441'225 µs
// arm fleet
// with pthread_mutex_t
// 30 threads / 100000 iterations
//
// 87,132 us real
// 183,517 us user
// 20,020 us sys
// footek_test on studio.test. 560 µs 12'418 µs 92'825 µs
// 679,374 us real
// 957,678 us user
// 605,078 us sys
// footek_test on pi.test. 462 µs 16'574 µs 702'833 µs
// 902,343 us real
// 1,459,706 us user
// 781,140 us sys
// footek_test on freebsdarm.test. 400 µs 16'261 µs 970'022 µs

View file

@ -37,6 +37,7 @@
#include "third_party/nsync/atomic.internal.h" #include "third_party/nsync/atomic.internal.h"
#include "third_party/nsync/common.internal.h" #include "third_party/nsync/common.internal.h"
#include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/mu_semaphore.h"
#include "third_party/nsync/mu_semaphore.internal.h"
#include "third_party/nsync/wait_s.internal.h" #include "third_party/nsync/wait_s.internal.h"
__static_yoink("nsync_notice"); __static_yoink("nsync_notice");
@ -147,9 +148,9 @@ static void free_waiters_push (waiter *w) {
static void free_waiters_populate (void) { static void free_waiters_populate (void) {
int n; int n;
if (IsNetbsd () || IsXnuSilicon ()) { if (IsNetbsd () || (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ())) {
// netbsd needs one file descriptor per semaphore (!!) // netbsd needs a real file descriptor per semaphore
// tim cook wants us to use his grand central dispatch // tim cook wants us to use his lol central dispatch
n = 1; n = 1;
} else { } else {
n = getpagesize() / sizeof(waiter); n = getpagesize() / sizeof(waiter);

View file

@ -52,6 +52,7 @@
#include "third_party/nsync/atomic.h" #include "third_party/nsync/atomic.h"
#include "third_party/nsync/common.internal.h" #include "third_party/nsync/common.internal.h"
#include "third_party/nsync/futex.internal.h" #include "third_party/nsync/futex.internal.h"
#include "libc/intrin/kprintf.h"
#include "third_party/nsync/time.h" #include "third_party/nsync/time.h"
#define FUTEX_WAIT_BITS_ FUTEX_BITSET_MATCH_ANY #define FUTEX_WAIT_BITS_ FUTEX_BITSET_MATCH_ANY
@ -138,7 +139,7 @@ static int nsync_futex_polyfill_ (atomic_int *w, int expect, struct timespec *ab
} }
if (_weaken (pthread_testcancel_np) && if (_weaken (pthread_testcancel_np) &&
_weaken (pthread_testcancel_np) ()) { _weaken (pthread_testcancel_np) ()) {
return -ETIMEDOUT; return -ECANCELED;
} }
if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) { if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) {
return -ETIMEDOUT; return -ETIMEDOUT;
@ -163,7 +164,7 @@ static int nsync_futex_wait_win32_ (atomic_int *w, int expect, char pshare,
for (;;) { for (;;) {
now = timespec_real (); now = timespec_real ();
if (timespec_cmp (now, deadline) > 0) { if (timespec_cmp (now, deadline) >= 0) {
return etimedout(); return etimedout();
} }
wait = timespec_sub (deadline, now); wait = timespec_sub (deadline, now);

View file

@ -21,14 +21,9 @@
#include "third_party/nsync/mu_semaphore.internal.h" #include "third_party/nsync/mu_semaphore.internal.h"
__static_yoink("nsync_notice"); __static_yoink("nsync_notice");
/* Apple's ulock (part by Cosmo futexes) is an internal API, but:
1. Unlike GCD it's cancellable, i.e. can be EINTR'd by signals
2. We currently always use ulock anyway for joining threads */
#define PREFER_GCD_OVER_ULOCK 1
/* Initialize *s; the initial value is 0. */ /* Initialize *s; the initial value is 0. */
bool nsync_mu_semaphore_init (nsync_semaphore *s) { bool nsync_mu_semaphore_init (nsync_semaphore *s) {
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) { if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
return nsync_mu_semaphore_init_gcd (s); return nsync_mu_semaphore_init_gcd (s);
} else if (IsNetbsd ()) { } else if (IsNetbsd ()) {
return nsync_mu_semaphore_init_sem (s); return nsync_mu_semaphore_init_sem (s);
@ -44,7 +39,7 @@ bool nsync_mu_semaphore_init (nsync_semaphore *s) {
errno_t nsync_mu_semaphore_p (nsync_semaphore *s) { errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
errno_t err; errno_t err;
BEGIN_CANCELATION_POINT; BEGIN_CANCELATION_POINT;
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) { if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
err = nsync_mu_semaphore_p_gcd (s); err = nsync_mu_semaphore_p_gcd (s);
} else if (IsNetbsd ()) { } else if (IsNetbsd ()) {
err = nsync_mu_semaphore_p_sem (s); err = nsync_mu_semaphore_p_sem (s);
@ -62,7 +57,7 @@ errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline) { errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline) {
errno_t err; errno_t err;
BEGIN_CANCELATION_POINT; BEGIN_CANCELATION_POINT;
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) { if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
err = nsync_mu_semaphore_p_with_deadline_gcd (s, abs_deadline); err = nsync_mu_semaphore_p_with_deadline_gcd (s, abs_deadline);
} else if (IsNetbsd ()) { } else if (IsNetbsd ()) {
err = nsync_mu_semaphore_p_with_deadline_sem (s, abs_deadline); err = nsync_mu_semaphore_p_with_deadline_sem (s, abs_deadline);
@ -75,7 +70,7 @@ errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_d
/* Ensure that the count of *s is at least 1. */ /* Ensure that the count of *s is at least 1. */
void nsync_mu_semaphore_v (nsync_semaphore *s) { void nsync_mu_semaphore_v (nsync_semaphore *s) {
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) { if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
return nsync_mu_semaphore_v_gcd (s); return nsync_mu_semaphore_v_gcd (s);
} else if (IsNetbsd ()) { } else if (IsNetbsd ()) {
return nsync_mu_semaphore_v_sem (s); return nsync_mu_semaphore_v_sem (s);

View file

@ -4,6 +4,20 @@
#include "third_party/nsync/time.h" #include "third_party/nsync/time.h"
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
/* XNU ulock (used by cosmo futexes) is an internal API, however:
1. Unlike GCD it's cancelable i.e. can be EINTR'd by signals
2. We have no choice but to use ulock for joining threads
3. Grand Central Dispatch requires a busy loop workaround
4. ulock makes our mutexes use 20% more system time (meh)
5. ulock makes our mutexes use 40% less wall time (good)
6. ulock makes our mutexes use 64% less user time (woop)
ulock is an outstanding system call that must be used.
gcd is not an acceptable alternative to ulock. */
#define NSYNC_USE_GRAND_CENTRAL 0
bool nsync_mu_semaphore_init_futex(nsync_semaphore *); bool nsync_mu_semaphore_init_futex(nsync_semaphore *);
errno_t nsync_mu_semaphore_p_futex(nsync_semaphore *); errno_t nsync_mu_semaphore_p_futex(nsync_semaphore *);
errno_t nsync_mu_semaphore_p_with_deadline_futex(nsync_semaphore *, nsync_time); errno_t nsync_mu_semaphore_p_with_deadline_futex(nsync_semaphore *, nsync_time);