mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Make more improvements to threads and mappings
- NetBSD should now have faster synchronization - POSIX barriers may now be shared across processes - An edge case with memory map tracking has been fixed - Grand Central Dispatch is no longer used on MacOS ARM64 - POSIX mutexes in normal mode now use futexes across processes
This commit is contained in:
parent
2187d6d2dd
commit
e398f3887c
20 changed files with 566 additions and 171 deletions
|
@ -26,11 +26,9 @@
|
|||
|
||||
int begin_cancelation_point(void) {
|
||||
int state = 0;
|
||||
struct CosmoTib *tib;
|
||||
struct PosixThread *pt;
|
||||
if (__tls_enabled) {
|
||||
tib = __get_tls();
|
||||
if ((pt = (struct PosixThread *)tib->tib_pthread)) {
|
||||
struct PosixThread *pt;
|
||||
if ((pt = _pthread_self())) {
|
||||
state = pt->pt_flags & PT_INCANCEL;
|
||||
pt->pt_flags |= PT_INCANCEL;
|
||||
}
|
||||
|
@ -39,11 +37,9 @@ int begin_cancelation_point(void) {
|
|||
}
|
||||
|
||||
void end_cancelation_point(int state) {
|
||||
struct CosmoTib *tib;
|
||||
struct PosixThread *pt;
|
||||
if (__tls_enabled) {
|
||||
tib = __get_tls();
|
||||
if ((pt = (struct PosixThread *)tib->tib_pthread)) {
|
||||
struct PosixThread *pt;
|
||||
if ((pt = _pthread_self())) {
|
||||
pt->pt_flags &= ~PT_INCANCEL;
|
||||
pt->pt_flags |= state;
|
||||
}
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "libc/thread/tls2.internal.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define MAPS_RETRY ((void *)-1)
|
||||
|
||||
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
|
||||
|
||||
struct Map {
|
||||
|
|
|
@ -120,6 +120,7 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
|||
struct Map *map;
|
||||
struct Map *next;
|
||||
struct Map *floor;
|
||||
StartOver:
|
||||
floor = __maps_floor(addr);
|
||||
for (map = floor; map && map->addr <= addr + size; map = next) {
|
||||
next = __maps_next(map);
|
||||
|
@ -148,6 +149,8 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
|||
ASSERT(left > 0);
|
||||
struct Map *leftmap;
|
||||
if ((leftmap = __maps_alloc())) {
|
||||
if (leftmap == MAPS_RETRY)
|
||||
goto StartOver;
|
||||
map->addr += left;
|
||||
map->size = right;
|
||||
if (!(map->flags & MAP_ANONYMOUS))
|
||||
|
@ -167,6 +170,8 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
|||
size_t right = map_addr + map_size - addr;
|
||||
struct Map *rightmap;
|
||||
if ((rightmap = __maps_alloc())) {
|
||||
if (rightmap == MAPS_RETRY)
|
||||
goto StartOver;
|
||||
map->size = left;
|
||||
__maps.pages -= (right + pagesz - 1) / pagesz;
|
||||
rightmap->addr = addr;
|
||||
|
@ -184,8 +189,14 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
|||
size_t right = map_size - middle - left;
|
||||
struct Map *leftmap;
|
||||
if ((leftmap = __maps_alloc())) {
|
||||
if (leftmap == MAPS_RETRY)
|
||||
goto StartOver;
|
||||
struct Map *middlemap;
|
||||
if ((middlemap = __maps_alloc())) {
|
||||
if (middlemap == MAPS_RETRY) {
|
||||
__maps_free(leftmap);
|
||||
goto StartOver;
|
||||
}
|
||||
leftmap->addr = map_addr;
|
||||
leftmap->size = left;
|
||||
leftmap->off = map->off;
|
||||
|
@ -204,6 +215,7 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
|||
*deleted = middlemap;
|
||||
__maps_check();
|
||||
} else {
|
||||
__maps_free(leftmap);
|
||||
rc = -1;
|
||||
}
|
||||
} else {
|
||||
|
@ -304,12 +316,11 @@ struct Map *__maps_alloc(void) {
|
|||
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK;
|
||||
map->hand = sys.maphandle;
|
||||
__maps_lock();
|
||||
__maps_insert(map++);
|
||||
__maps_insert(map);
|
||||
__maps_unlock();
|
||||
map->addr = MAP_FAILED;
|
||||
for (int i = 1; i < gransz / sizeof(struct Map) - 1; ++i)
|
||||
for (int i = 1; i < gransz / sizeof(struct Map); ++i)
|
||||
__maps_free(map + i);
|
||||
return map;
|
||||
return MAPS_RETRY;
|
||||
}
|
||||
|
||||
static int __munmap(char *addr, size_t size) {
|
||||
|
@ -396,21 +407,32 @@ void *__maps_pickaddr(size_t size) {
|
|||
static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
|
||||
int64_t off, int pagesz, int gransz) {
|
||||
|
||||
// allocate Map object
|
||||
struct Map *map;
|
||||
do {
|
||||
if (!(map = __maps_alloc()))
|
||||
return MAP_FAILED;
|
||||
} while (map == MAPS_RETRY);
|
||||
|
||||
// polyfill nuances of fixed mappings
|
||||
int sysflags = flags;
|
||||
bool noreplace = false;
|
||||
bool should_untrack = false;
|
||||
if (flags & MAP_FIXED_NOREPLACE) {
|
||||
if (flags & MAP_FIXED)
|
||||
if (flags & MAP_FIXED) {
|
||||
__maps_free(map);
|
||||
return (void *)einval();
|
||||
}
|
||||
sysflags &= ~MAP_FIXED_NOREPLACE;
|
||||
if (IsLinux()) {
|
||||
noreplace = true;
|
||||
sysflags |= MAP_FIXED_NOREPLACE_linux;
|
||||
} else if (IsFreebsd() || IsNetbsd()) {
|
||||
sysflags |= MAP_FIXED;
|
||||
if (__maps_overlaps(addr, size, pagesz))
|
||||
if (__maps_overlaps(addr, size, pagesz)) {
|
||||
__maps_free(map);
|
||||
return (void *)eexist();
|
||||
}
|
||||
} else {
|
||||
noreplace = true;
|
||||
}
|
||||
|
@ -418,11 +440,6 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
|
|||
should_untrack = true;
|
||||
}
|
||||
|
||||
// allocate Map object
|
||||
struct Map *map;
|
||||
if (!(map = __maps_alloc()))
|
||||
return MAP_FAILED;
|
||||
|
||||
// remove mapping we blew away
|
||||
if (IsWindows() && should_untrack)
|
||||
__munmap(addr, size);
|
||||
|
@ -572,23 +589,27 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
|
|||
return (void *)einval();
|
||||
}
|
||||
|
||||
// allocate object for tracking new mapping
|
||||
struct Map *map;
|
||||
do {
|
||||
if (!(map = __maps_alloc()))
|
||||
return (void *)enomem();
|
||||
} while (map == MAPS_RETRY);
|
||||
|
||||
// check old interval is fully contained within one mapping
|
||||
struct Map *old_map;
|
||||
if (!(old_map = __maps_floor(old_addr)) ||
|
||||
old_addr + old_size > old_map->addr + PGUP(old_map->size) ||
|
||||
old_addr < old_map->addr)
|
||||
old_addr < old_map->addr) {
|
||||
__maps_free(map);
|
||||
return (void *)efault();
|
||||
}
|
||||
|
||||
// save old properties
|
||||
int old_off = old_map->off;
|
||||
int old_prot = old_map->prot;
|
||||
int old_flags = old_map->flags;
|
||||
|
||||
// allocate object for tracking new mapping
|
||||
struct Map *map;
|
||||
if (!(map = __maps_alloc()))
|
||||
return (void *)enomem();
|
||||
|
||||
// netbsd mremap fixed returns enoent rather than unmapping old pages
|
||||
if (IsNetbsd() && (flags & MREMAP_FIXED))
|
||||
if (__munmap(new_addr, new_size)) {
|
||||
|
|
|
@ -75,6 +75,7 @@ int __mprotect(char *addr, size_t size, int prot) {
|
|||
return edeadlk();
|
||||
}
|
||||
struct Map *map, *floor;
|
||||
StartOver:
|
||||
floor = __maps_floor(addr);
|
||||
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) {
|
||||
char *map_addr = map->addr;
|
||||
|
@ -93,10 +94,12 @@ int __mprotect(char *addr, size_t size, int prot) {
|
|||
}
|
||||
} else if (addr <= map_addr) {
|
||||
// change lefthand side of mapping
|
||||
size_t left = PGUP(addr + size - map_addr);
|
||||
size_t left = addr + size - map_addr;
|
||||
size_t right = map_size - left;
|
||||
struct Map *leftmap;
|
||||
if ((leftmap = __maps_alloc())) {
|
||||
if (leftmap == MAPS_RETRY)
|
||||
goto StartOver;
|
||||
if (!__mprotect_chunk(map_addr, left, prot, false)) {
|
||||
leftmap->addr = map_addr;
|
||||
leftmap->size = left;
|
||||
|
@ -127,6 +130,8 @@ int __mprotect(char *addr, size_t size, int prot) {
|
|||
size_t right = map_addr + map_size - addr;
|
||||
struct Map *leftmap;
|
||||
if ((leftmap = __maps_alloc())) {
|
||||
if (leftmap == MAPS_RETRY)
|
||||
goto StartOver;
|
||||
if (!__mprotect_chunk(map_addr + left, right, prot, false)) {
|
||||
leftmap->addr = map_addr;
|
||||
leftmap->size = left;
|
||||
|
@ -159,8 +164,14 @@ int __mprotect(char *addr, size_t size, int prot) {
|
|||
size_t right = map_size - middle - left;
|
||||
struct Map *leftmap;
|
||||
if ((leftmap = __maps_alloc())) {
|
||||
if (leftmap == MAPS_RETRY)
|
||||
goto StartOver;
|
||||
struct Map *midlmap;
|
||||
if ((midlmap = __maps_alloc())) {
|
||||
if (midlmap == MAPS_RETRY) {
|
||||
__maps_free(leftmap);
|
||||
goto StartOver;
|
||||
}
|
||||
if (!__mprotect_chunk(map_addr + left, middle, prot, false)) {
|
||||
leftmap->addr = map_addr;
|
||||
leftmap->size = left;
|
||||
|
|
|
@ -27,41 +27,47 @@
|
|||
#include "libc/runtime/internal.h"
|
||||
#include "libc/thread/lock.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||
int me;
|
||||
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||
int backoff = 0;
|
||||
uint64_t word, lock;
|
||||
|
||||
// get current state of lock
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
|
||||
#if PTHREAD_USE_NSYNC
|
||||
// use fancy nsync mutex if possible
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||
_weaken(nsync_mu_lock)) {
|
||||
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// implement barebones normal mutexes
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||
uint64_t lock;
|
||||
for (;;) {
|
||||
word = MUTEX_UNLOCK(word);
|
||||
lock = MUTEX_LOCK(word);
|
||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||
memory_order_acquire,
|
||||
memory_order_relaxed))
|
||||
return 0;
|
||||
return;
|
||||
backoff = pthread_delay_np(mutex, backoff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// implement recursive mutexes
|
||||
me = gettid();
|
||||
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||
// slightly improved to attempt acquiring multiple times b4 syscall
|
||||
static void pthread_mutex_lock_drepper(atomic_int *futex, char pshare) {
|
||||
int word;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
word = 0;
|
||||
if (atomic_compare_exchange_strong_explicit(
|
||||
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||
return;
|
||||
pthread_pause_np();
|
||||
}
|
||||
if (word == 1)
|
||||
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||
while (word > 0) {
|
||||
_weaken(nsync_futex_wait_)(futex, 2, pshare, 0);
|
||||
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||
}
|
||||
}
|
||||
|
||||
static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
|
||||
uint64_t word) {
|
||||
uint64_t lock;
|
||||
int backoff = 0;
|
||||
int me = gettid();
|
||||
for (;;) {
|
||||
if (MUTEX_OWNER(word) == me) {
|
||||
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
||||
|
@ -91,6 +97,36 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
|||
}
|
||||
}
|
||||
|
||||
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||
uint64_t word;
|
||||
|
||||
// get current state of lock
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
|
||||
#if PTHREAD_USE_NSYNC
|
||||
// use superior mutexes if possible
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||
_weaken(nsync_mu_lock)) {
|
||||
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// handle normal mutexes
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||
if (_weaken(nsync_futex_wait_)) {
|
||||
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||
} else {
|
||||
pthread_mutex_lock_naive(mutex, word);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// handle recursive and error checking mutexes
|
||||
return pthread_mutex_lock_recursive(mutex, word);
|
||||
}
|
||||
|
||||
/**
|
||||
* Locks mutex.
|
||||
*
|
||||
|
|
|
@ -24,43 +24,12 @@
|
|||
#include "libc/runtime/internal.h"
|
||||
#include "libc/thread/lock.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
/**
|
||||
* Attempts acquiring lock.
|
||||
*
|
||||
* Unlike pthread_mutex_lock() this function won't block and instead
|
||||
* returns an error immediately if the lock couldn't be acquired.
|
||||
*
|
||||
* @return 0 if lock was acquired, otherwise an errno
|
||||
* @raise EAGAIN if maximum number of recursive locks is held
|
||||
* @raise EBUSY if lock is currently held in read or write mode
|
||||
* @raise EINVAL if `mutex` doesn't refer to an initialized lock
|
||||
* @raise EDEADLK if `mutex` is `PTHREAD_MUTEX_ERRORCHECK` and the
|
||||
* current thread already holds this mutex
|
||||
*/
|
||||
errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
||||
int me;
|
||||
uint64_t word, lock;
|
||||
|
||||
// get current state of lock
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
|
||||
#if PTHREAD_USE_NSYNC
|
||||
// delegate to *NSYNC if possible
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
|
||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||
_weaken(nsync_mu_trylock)) {
|
||||
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
|
||||
return 0;
|
||||
} else {
|
||||
return EBUSY;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// handle normal mutexes
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
|
||||
uint64_t word) {
|
||||
uint64_t lock;
|
||||
word = MUTEX_UNLOCK(word);
|
||||
lock = MUTEX_LOCK(word);
|
||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||
|
@ -68,10 +37,20 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
|||
memory_order_relaxed))
|
||||
return 0;
|
||||
return EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
// handle recursive and error check mutexes
|
||||
me = gettid();
|
||||
static errno_t pthread_mutex_trylock_drepper(atomic_int *futex) {
|
||||
int word = 0;
|
||||
if (atomic_compare_exchange_strong_explicit(
|
||||
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||
return 0;
|
||||
return EBUSY;
|
||||
}
|
||||
|
||||
static errno_t pthread_mutex_trylock_recursive(pthread_mutex_t *mutex,
|
||||
uint64_t word) {
|
||||
uint64_t lock;
|
||||
int me = gettid();
|
||||
for (;;) {
|
||||
if (MUTEX_OWNER(word) == me) {
|
||||
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
||||
|
@ -100,3 +79,47 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
|||
return EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts acquiring lock.
|
||||
*
|
||||
* Unlike pthread_mutex_lock() this function won't block and instead
|
||||
* returns an error immediately if the lock couldn't be acquired.
|
||||
*
|
||||
* @return 0 if lock was acquired, otherwise an errno
|
||||
* @raise EAGAIN if maximum number of recursive locks is held
|
||||
* @raise EBUSY if lock is currently held in read or write mode
|
||||
* @raise EINVAL if `mutex` doesn't refer to an initialized lock
|
||||
* @raise EDEADLK if `mutex` is `PTHREAD_MUTEX_ERRORCHECK` and the
|
||||
* current thread already holds this mutex
|
||||
*/
|
||||
errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
||||
|
||||
// get current state of lock
|
||||
uint64_t word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
|
||||
#if PTHREAD_USE_NSYNC
|
||||
// use superior mutexes if possible
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
|
||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||
_weaken(nsync_mu_trylock)) {
|
||||
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
|
||||
return 0;
|
||||
} else {
|
||||
return EBUSY;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// handle normal mutexes
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||
if (_weaken(nsync_futex_wait_)) {
|
||||
return pthread_mutex_trylock_drepper(&mutex->_futex);
|
||||
} else {
|
||||
return pthread_mutex_trylock_naive(mutex, word);
|
||||
}
|
||||
}
|
||||
|
||||
// handle recursive and error checking mutexes
|
||||
return pthread_mutex_trylock_recursive(mutex, word);
|
||||
}
|
||||
|
|
|
@ -25,45 +25,26 @@
|
|||
#include "libc/runtime/internal.h"
|
||||
#include "libc/thread/lock.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
/**
|
||||
* Releases mutex.
|
||||
*
|
||||
* This function does nothing in vfork() children.
|
||||
*
|
||||
* @return 0 on success or error number on failure
|
||||
* @raises EPERM if in error check mode and not owned by caller
|
||||
* @vforksafe
|
||||
*/
|
||||
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
||||
int me;
|
||||
uint64_t word, lock;
|
||||
|
||||
LOCKTRACE("pthread_mutex_unlock(%t)", mutex);
|
||||
|
||||
// get current state of lock
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
|
||||
#if PTHREAD_USE_NSYNC
|
||||
// use fancy nsync mutex if possible
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||
_weaken(nsync_mu_unlock)) {
|
||||
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// implement barebones normal mutexes
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||
lock = MUTEX_UNLOCK(word);
|
||||
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||
uint64_t lock = MUTEX_UNLOCK(word);
|
||||
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// implement recursive mutex unlocking
|
||||
me = gettid();
|
||||
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||
static void pthread_mutex_unlock_drepper(atomic_int *futex, char pshare) {
|
||||
int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
|
||||
if (word == 2) {
|
||||
atomic_store_explicit(futex, 0, memory_order_release);
|
||||
_weaken(nsync_futex_wake_)(futex, 1, pshare);
|
||||
}
|
||||
}
|
||||
|
||||
static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
|
||||
uint64_t word) {
|
||||
int me = gettid();
|
||||
for (;;) {
|
||||
|
||||
// we allow unlocking an initialized lock that wasn't locked, but we
|
||||
|
@ -88,3 +69,44 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases mutex.
|
||||
*
|
||||
* This function does nothing in vfork() children.
|
||||
*
|
||||
* @return 0 on success or error number on failure
|
||||
* @raises EPERM if in error check mode and not owned by caller
|
||||
* @vforksafe
|
||||
*/
|
||||
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
||||
uint64_t word;
|
||||
|
||||
LOCKTRACE("pthread_mutex_unlock(%t)", mutex);
|
||||
|
||||
// get current state of lock
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
|
||||
#if PTHREAD_USE_NSYNC
|
||||
// use superior mutexes if possible
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||
_weaken(nsync_mu_unlock)) {
|
||||
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// implement barebones normal mutexes
|
||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||
if (_weaken(nsync_futex_wake_)) {
|
||||
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||
} else {
|
||||
pthread_mutex_unlock_naive(mutex, word);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// handle recursive and error checking mutexes
|
||||
return pthread_mutex_unlock_recursive(mutex, word);
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ void sys_sched_yield(void);
|
|||
int pthread_yield_np(void) {
|
||||
if (IsXnuSilicon()) {
|
||||
__syslib->__pthread_yield_np();
|
||||
} else if (IsOpenbsd() || IsNetbsd()) {
|
||||
} else if (IsOpenbsd()) {
|
||||
// sched_yield() is punishingly slow on OpenBSD
|
||||
// it's ruinously slow it'll destroy everything
|
||||
pthread_pause_np();
|
||||
|
|
|
@ -16,9 +16,10 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/nsync/counter.h"
|
||||
|
||||
/**
|
||||
* Destroys barrier.
|
||||
|
@ -27,9 +28,8 @@
|
|||
* @raise EINVAL if threads are still inside the barrier
|
||||
*/
|
||||
errno_t pthread_barrier_destroy(pthread_barrier_t *barrier) {
|
||||
if (barrier->_nsync) {
|
||||
nsync_counter_free(barrier->_nsync);
|
||||
barrier->_nsync = 0;
|
||||
}
|
||||
if (atomic_load_explicit(&barrier->_waiters, memory_order_relaxed))
|
||||
return EINVAL;
|
||||
memset(barrier, -1, sizeof(*barrier));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -17,8 +17,9 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/nsync/counter.h"
|
||||
|
||||
/**
|
||||
* Initializes barrier.
|
||||
|
@ -28,16 +29,17 @@
|
|||
* before the barrier is released, which must be greater than zero
|
||||
* @return 0 on success, or error number on failure
|
||||
* @raise EINVAL if `count` isn't greater than zero
|
||||
* @raise ENOMEM if insufficient memory exists
|
||||
*/
|
||||
errno_t pthread_barrier_init(pthread_barrier_t *barrier,
|
||||
const pthread_barrierattr_t *attr,
|
||||
unsigned count) {
|
||||
nsync_counter c;
|
||||
if (!count)
|
||||
return EINVAL;
|
||||
if (!(c = nsync_counter_new(count)))
|
||||
return ENOMEM;
|
||||
*barrier = (pthread_barrier_t){._nsync = c};
|
||||
if (count > INT_MAX)
|
||||
return EINVAL;
|
||||
barrier->_count = count;
|
||||
barrier->_pshared = attr ? *attr : PTHREAD_PROCESS_PRIVATE;
|
||||
atomic_store_explicit(&barrier->_counter, count, memory_order_relaxed);
|
||||
atomic_store_explicit(&barrier->_waiters, 0, memory_order_relaxed);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -16,25 +16,53 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/blockcancel.internal.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/nsync/counter.h"
|
||||
#include "third_party/nsync/futex.internal.h"
|
||||
|
||||
/**
|
||||
* Waits for all threads to arrive at barrier.
|
||||
*
|
||||
* When the barrier is broken, the state becomes reset to what it was
|
||||
* when pthread_barrier_init() was called, so that the barrior may be
|
||||
* used again in the same way. The last thread to arrive shall be the
|
||||
* last to leave and it returns a magic value.
|
||||
* used again in the same way.
|
||||
*
|
||||
* Unlike pthread_cond_timedwait() this function is not a cancelation
|
||||
* point. It is not needed to have cleanup handlers on block cancels.
|
||||
*
|
||||
* @return 0 on success, `PTHREAD_BARRIER_SERIAL_THREAD` to one lucky
|
||||
* thread which was the last arrival, or an errno on error
|
||||
* @raise EINVAL if barrier is used incorrectly
|
||||
*/
|
||||
errno_t pthread_barrier_wait(pthread_barrier_t *barrier) {
|
||||
if (nsync_counter_add(barrier->_nsync, -1)) {
|
||||
nsync_counter_wait(barrier->_nsync, nsync_time_no_deadline);
|
||||
return 0;
|
||||
} else {
|
||||
int n;
|
||||
|
||||
// enter barrier
|
||||
atomic_fetch_add_explicit(&barrier->_waiters, 1, memory_order_acq_rel);
|
||||
n = atomic_fetch_sub_explicit(&barrier->_counter, 1, memory_order_acq_rel);
|
||||
n = n - 1;
|
||||
|
||||
// this can only happen on invalid usage
|
||||
if (n < 0)
|
||||
return EINVAL;
|
||||
|
||||
// reset count and wake waiters if we're last at barrier
|
||||
if (!n) {
|
||||
atomic_store_explicit(&barrier->_counter, barrier->_count,
|
||||
memory_order_release);
|
||||
atomic_store_explicit(&barrier->_waiters, 0, memory_order_release);
|
||||
nsync_futex_wake_(&barrier->_waiters, INT_MAX, barrier->_pshared);
|
||||
return PTHREAD_BARRIER_SERIAL_THREAD;
|
||||
}
|
||||
|
||||
// wait for everyone else to arrive at barrier
|
||||
BLOCK_CANCELATION;
|
||||
while ((n = atomic_load_explicit(&barrier->_waiters, memory_order_acquire)))
|
||||
nsync_futex_wait_(&barrier->_waiters, n, barrier->_pshared, 0);
|
||||
ALLOW_CANCELATION;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*
|
||||
* @param pshared is set to one of the following
|
||||
* - `PTHREAD_PROCESS_PRIVATE` (default)
|
||||
* - `PTHREAD_PROCESS_SHARED` (unsupported)
|
||||
* - `PTHREAD_PROCESS_SHARED`
|
||||
* @return 0 on success, or error on failure
|
||||
*/
|
||||
errno_t pthread_barrierattr_getpshared(const pthread_barrierattr_t *attr,
|
||||
|
|
|
@ -24,6 +24,6 @@
|
|||
* @return 0 on success, or error on failure
|
||||
*/
|
||||
errno_t pthread_barrierattr_init(pthread_barrierattr_t *attr) {
|
||||
*attr = 0;
|
||||
*attr = PTHREAD_PROCESS_PRIVATE;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -24,13 +24,14 @@
|
|||
*
|
||||
* @param pshared can be one of
|
||||
* - `PTHREAD_PROCESS_PRIVATE` (default)
|
||||
* - `PTHREAD_PROCESS_SHARED` (unsupported)
|
||||
* - `PTHREAD_PROCESS_SHARED`
|
||||
* @return 0 on success, or error on failure
|
||||
* @raises EINVAL if `pshared` is invalid
|
||||
*/
|
||||
errno_t pthread_barrierattr_setpshared(pthread_barrierattr_t *attr,
|
||||
int pshared) {
|
||||
switch (pshared) {
|
||||
case PTHREAD_PROCESS_SHARED:
|
||||
case PTHREAD_PROCESS_PRIVATE:
|
||||
*attr = pshared;
|
||||
return 0;
|
||||
|
|
|
@ -46,7 +46,7 @@ COSMOPOLITAN_C_START_
|
|||
#define PTHREAD_RWLOCK_INITIALIZER {0}
|
||||
#define PTHREAD_MUTEX_INITIALIZER {0}
|
||||
|
||||
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, 0, PTHREAD_MUTEX_RECURSIVE}
|
||||
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, {}, PTHREAD_MUTEX_RECURSIVE}
|
||||
|
||||
typedef uintptr_t pthread_t;
|
||||
typedef int pthread_id_np_t;
|
||||
|
@ -66,7 +66,10 @@ typedef struct pthread_spinlock_s {
|
|||
|
||||
typedef struct pthread_mutex_s {
|
||||
uint32_t _nsync;
|
||||
union {
|
||||
int32_t _pid;
|
||||
_Atomic(int32_t) _futex;
|
||||
};
|
||||
_Atomic(uint64_t) _word;
|
||||
} pthread_mutex_t;
|
||||
|
||||
|
@ -92,7 +95,10 @@ typedef struct pthread_rwlock_s {
|
|||
} pthread_rwlock_t;
|
||||
|
||||
typedef struct pthread_barrier_s {
|
||||
void *_nsync;
|
||||
int _count;
|
||||
char _pshared;
|
||||
_Atomic(int) _counter;
|
||||
_Atomic(int) _waiters;
|
||||
} pthread_barrier_t;
|
||||
|
||||
typedef struct pthread_attr_s {
|
||||
|
|
236
test/libc/thread/footek_test.c
Normal file
236
test/libc/thread/footek_test.c
Normal file
|
@ -0,0 +1,236 @@
|
|||
#include <assert.h>
|
||||
#include <cosmo.h>
|
||||
#include <linux/futex.h>
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include "third_party/nsync/futex.internal.h"
|
||||
|
||||
// THIS IS AN EXAMPLE OF HOW TO USE COSMOPOLITAN FUTEXES TO IMPLEMENT
|
||||
// YOUR OWN MUTEXES FROM SCRATCH. LOOK AT HOW MUCH BETTER THIS IT CAN
|
||||
// MAKE THINGS COMPARED TO SPIN LOCKS. ALGORITHM FROM ULRICH DREPPER.
|
||||
|
||||
// arm fleet
|
||||
// with futexes
|
||||
// 30 threads / 100000 iterations
|
||||
//
|
||||
// 242,604 us real
|
||||
// 4,222,946 us user
|
||||
// 1,079,229 us sys
|
||||
// footek_test on studio.test. 630 µs 17'415 µs 256'782 µs
|
||||
// 1,362,557 us real
|
||||
// 3,232,978 us user
|
||||
// 2,104,824 us sys
|
||||
// footek_test on pi.test. 611 µs 21'708 µs 1'385'129 µs
|
||||
// 1,346,482 us real
|
||||
// 3,370,513 us user
|
||||
// 1,992,383 us sys
|
||||
// footek_test on freebsdarm.test. 427 µs 19'967 µs 1'393'476 µs
|
||||
|
||||
// arm fleet
|
||||
// without futexes
|
||||
// 30 threads / 100000 iterations
|
||||
//
|
||||
// 1,282,084 us real
|
||||
// 29,359,582 us user
|
||||
// 34,553 us sys
|
||||
// footek_test on studio.test. 961 µs 12'907 µs 1'287'983 µs
|
||||
// 4,070,988 us real
|
||||
// 16,203,990 us user
|
||||
// 7,999 us sys
|
||||
// footek_test on pi.test. 459 µs 16'376 µs 4'095'512 µs
|
||||
// 7,012,493 us real
|
||||
// 27,936,725 us user
|
||||
// 7,871 us sys
|
||||
// footek_test on freebsdarm.test. 502 µs 16'446 µs 7'051'545 µs
|
||||
|
||||
// x86 fleet
|
||||
// with futexes
|
||||
// 30 threads / 100000 iterations
|
||||
//
|
||||
// 146,015 us real
|
||||
// 169,427 us user
|
||||
// 68,939 us sys
|
||||
// footek_test on rhel7.test. 376 µs 2'259 µs 153'024 µs
|
||||
// 144,917 us real
|
||||
// 383,317 us user
|
||||
// 191,203 us sys
|
||||
// footek_test on xnu.test. 11'143 µs 9'159 µs 164'865 µs
|
||||
// 244,286 us real
|
||||
// 405,395 us user
|
||||
// 956,122 us sys
|
||||
// footek_test on freebsd.test. 394 µs 2'165 µs 256'227 µs
|
||||
// 209,095 us real
|
||||
// 616,634 us user
|
||||
// 9,945 us sys
|
||||
// footek_test on netbsd.test. 502 µs 2'020 µs 261'895 µs
|
||||
// 344,876 us real
|
||||
// 50,000 us user
|
||||
// 1,240,000 us sys
|
||||
// footek_test on openbsd.test. 457 µs 2'737 µs 396'342 µs
|
||||
// 1,193,906 us real
|
||||
// 17,546,875 us user
|
||||
// 3,000,000 us sys
|
||||
// footek_test on win10.test. 462 µs 59'528 µs 1'348'265 µs
|
||||
|
||||
// x86 fleet
|
||||
// without futexes
|
||||
// 30 threads / 100000 iterations
|
||||
//
|
||||
// 897,815 us real
|
||||
// 1,763,705 us user
|
||||
// 9,696 us sys
|
||||
// footek_test on rhel7.test. 423 µs 2'638 µs 912'241 µs
|
||||
// 790,332 us real
|
||||
// 2,359,967 us user
|
||||
// 0 us sys
|
||||
// footek_test on netbsd.test. 1'151 µs 2'634 µs 1'014'867 µs
|
||||
// 2,332,724 us real
|
||||
// 9,150,000 us user
|
||||
// 10,000 us sys
|
||||
// footek_test on openbsd.test. 557 µs 3'020 µs 2'554'648 µs
|
||||
// 2,528,863 us real
|
||||
// 56,546,875 us user
|
||||
// 1,671,875 us sys
|
||||
// footek_test on win10.test. 962 µs 9'698 µs 2'751'905 µs
|
||||
// 2,916,033 us real
|
||||
// 17,236,103 us user
|
||||
// 0 us sys
|
||||
// footek_test on freebsd.test. 690 µs 3'011 µs 2'925'997 µs
|
||||
// 4,225,726 us real
|
||||
// 16,679,456 us user
|
||||
// 16,265 us sys
|
||||
// footek_test on xnu.test. 98'468 µs 5'242 µs 5'191'724 µs
|
||||
|
||||
#define USE_FUTEX 1
|
||||
#define THREADS 30
|
||||
#define ITERATIONS 30000
|
||||
|
||||
#define MUTEX_LOCKED(word) ((word) & 8)
|
||||
#define MUTEX_WAITING(word) ((word) & 16)
|
||||
|
||||
#define MUTEX_LOCK(word) ((word) | 8)
|
||||
#define MUTEX_SET_WAITING(word) ((word) | 16)
|
||||
#define MUTEX_UNLOCK(word) ((word) & ~(8 | 16))
|
||||
|
||||
void lock(atomic_int *futex) {
|
||||
int word, cs;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
word = 0;
|
||||
if (atomic_compare_exchange_strong_explicit(
|
||||
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||
return;
|
||||
pthread_pause_np();
|
||||
}
|
||||
if (word == 1)
|
||||
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||
while (word > 0) {
|
||||
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
|
||||
#if USE_FUTEX
|
||||
nsync_futex_wait_(futex, 2, 0, 0);
|
||||
#endif
|
||||
pthread_setcancelstate(cs, 0);
|
||||
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||
}
|
||||
}
|
||||
|
||||
void unlock(atomic_int *futex) {
|
||||
int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
|
||||
if (word == 2) {
|
||||
atomic_store_explicit(futex, 0, memory_order_release);
|
||||
#if USE_FUTEX
|
||||
nsync_futex_wake_(futex, 1, 0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
int g_chores;
|
||||
atomic_int g_lock;
|
||||
pthread_mutex_t g_locker;
|
||||
|
||||
void *worker(void *arg) {
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
lock(&g_lock);
|
||||
++g_chores;
|
||||
unlock(&g_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main() {
|
||||
struct timeval start;
|
||||
gettimeofday(&start, 0);
|
||||
|
||||
pthread_t th[THREADS];
|
||||
for (int i = 0; i < THREADS; ++i)
|
||||
pthread_create(&th[i], 0, worker, 0);
|
||||
for (int i = 0; i < THREADS; ++i)
|
||||
pthread_join(th[i], 0);
|
||||
npassert(g_chores == THREADS * ITERATIONS);
|
||||
|
||||
struct rusage ru;
|
||||
struct timeval end;
|
||||
gettimeofday(&end, 0);
|
||||
getrusage(RUSAGE_SELF, &ru);
|
||||
printf("%,16ld us real\n"
|
||||
"%,16ld us user\n"
|
||||
"%,16ld us sys\n",
|
||||
timeval_tomicros(timeval_sub(end, start)), //
|
||||
timeval_tomicros(ru.ru_utime), //
|
||||
timeval_tomicros(ru.ru_stime));
|
||||
|
||||
CheckForMemoryLeaks();
|
||||
}
|
||||
|
||||
// COMPARE ULRICH DREPPER'S LOCKING ALGORITHM WITH MIKE BURROWS *NSYNC
|
||||
// WHICH IS WHAT COSMOPOLITAN LIBC USES FOR YOUR POSIX THREADS MUTEXES
|
||||
|
||||
// x86 fleet
|
||||
// with pthread_mutex_t
|
||||
// 30 threads / 100000 iterations
|
||||
//
|
||||
// 186,976 us real
|
||||
// 43,609 us user
|
||||
// 205,585 us sys
|
||||
// footek_test on freebsd.test. 410 µs 2'054 µs 195'339 µs
|
||||
// 238,902 us real
|
||||
// 235,743 us user
|
||||
// 97,881 us sys
|
||||
// footek_test on rhel7.test. 343 µs 2'339 µs 246'926 µs
|
||||
// 201,285 us real
|
||||
// 249,612 us user
|
||||
// 141,230 us sys
|
||||
// footek_test on xnu.test. 1'960 µs 5'350 µs 265'758 µs
|
||||
// 303,363 us real
|
||||
// 60,000 us user
|
||||
// 410,000 us sys
|
||||
// footek_test on openbsd.test. 545 µs 3'023 µs 326'200 µs
|
||||
// 386,085 us real
|
||||
// 586,455 us user
|
||||
// 466,991 us sys
|
||||
// footek_test on netbsd.test. 344 µs 2'421 µs 413'440 µs
|
||||
// 245,010 us real
|
||||
// 437,500 us user
|
||||
// 140,625 us sys
|
||||
// footek_test on win10.test. 300 µs 18'574 µs 441'225 µs
|
||||
|
||||
// arm fleet
|
||||
// with pthread_mutex_t
|
||||
// 30 threads / 100000 iterations
|
||||
//
|
||||
// 87,132 us real
|
||||
// 183,517 us user
|
||||
// 20,020 us sys
|
||||
// footek_test on studio.test. 560 µs 12'418 µs 92'825 µs
|
||||
// 679,374 us real
|
||||
// 957,678 us user
|
||||
// 605,078 us sys
|
||||
// footek_test on pi.test. 462 µs 16'574 µs 702'833 µs
|
||||
// 902,343 us real
|
||||
// 1,459,706 us user
|
||||
// 781,140 us sys
|
||||
// footek_test on freebsdarm.test. 400 µs 16'261 µs 970'022 µs
|
7
third_party/nsync/common.c
vendored
7
third_party/nsync/common.c
vendored
|
@ -37,6 +37,7 @@
|
|||
#include "third_party/nsync/atomic.internal.h"
|
||||
#include "third_party/nsync/common.internal.h"
|
||||
#include "third_party/nsync/mu_semaphore.h"
|
||||
#include "third_party/nsync/mu_semaphore.internal.h"
|
||||
#include "third_party/nsync/wait_s.internal.h"
|
||||
__static_yoink("nsync_notice");
|
||||
|
||||
|
@ -147,9 +148,9 @@ static void free_waiters_push (waiter *w) {
|
|||
|
||||
static void free_waiters_populate (void) {
|
||||
int n;
|
||||
if (IsNetbsd () || IsXnuSilicon ()) {
|
||||
// netbsd needs one file descriptor per semaphore (!!)
|
||||
// tim cook wants us to use his grand central dispatch
|
||||
if (IsNetbsd () || (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ())) {
|
||||
// netbsd needs a real file descriptor per semaphore
|
||||
// tim cook wants us to use his lol central dispatch
|
||||
n = 1;
|
||||
} else {
|
||||
n = getpagesize() / sizeof(waiter);
|
||||
|
|
5
third_party/nsync/futex.c
vendored
5
third_party/nsync/futex.c
vendored
|
@ -52,6 +52,7 @@
|
|||
#include "third_party/nsync/atomic.h"
|
||||
#include "third_party/nsync/common.internal.h"
|
||||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "third_party/nsync/time.h"
|
||||
|
||||
#define FUTEX_WAIT_BITS_ FUTEX_BITSET_MATCH_ANY
|
||||
|
@ -138,7 +139,7 @@ static int nsync_futex_polyfill_ (atomic_int *w, int expect, struct timespec *ab
|
|||
}
|
||||
if (_weaken (pthread_testcancel_np) &&
|
||||
_weaken (pthread_testcancel_np) ()) {
|
||||
return -ETIMEDOUT;
|
||||
return -ECANCELED;
|
||||
}
|
||||
if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) {
|
||||
return -ETIMEDOUT;
|
||||
|
@ -163,7 +164,7 @@ static int nsync_futex_wait_win32_ (atomic_int *w, int expect, char pshare,
|
|||
|
||||
for (;;) {
|
||||
now = timespec_real ();
|
||||
if (timespec_cmp (now, deadline) > 0) {
|
||||
if (timespec_cmp (now, deadline) >= 0) {
|
||||
return etimedout();
|
||||
}
|
||||
wait = timespec_sub (deadline, now);
|
||||
|
|
13
third_party/nsync/mu_semaphore.c
vendored
13
third_party/nsync/mu_semaphore.c
vendored
|
@ -21,14 +21,9 @@
|
|||
#include "third_party/nsync/mu_semaphore.internal.h"
|
||||
__static_yoink("nsync_notice");
|
||||
|
||||
/* Apple's ulock (part by Cosmo futexes) is an internal API, but:
|
||||
1. Unlike GCD it's cancellable, i.e. can be EINTR'd by signals
|
||||
2. We currently always use ulock anyway for joining threads */
|
||||
#define PREFER_GCD_OVER_ULOCK 1
|
||||
|
||||
/* Initialize *s; the initial value is 0. */
|
||||
bool nsync_mu_semaphore_init (nsync_semaphore *s) {
|
||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
||||
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||
return nsync_mu_semaphore_init_gcd (s);
|
||||
} else if (IsNetbsd ()) {
|
||||
return nsync_mu_semaphore_init_sem (s);
|
||||
|
@ -44,7 +39,7 @@ bool nsync_mu_semaphore_init (nsync_semaphore *s) {
|
|||
errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
|
||||
errno_t err;
|
||||
BEGIN_CANCELATION_POINT;
|
||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
||||
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||
err = nsync_mu_semaphore_p_gcd (s);
|
||||
} else if (IsNetbsd ()) {
|
||||
err = nsync_mu_semaphore_p_sem (s);
|
||||
|
@ -62,7 +57,7 @@ errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
|
|||
errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline) {
|
||||
errno_t err;
|
||||
BEGIN_CANCELATION_POINT;
|
||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
||||
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||
err = nsync_mu_semaphore_p_with_deadline_gcd (s, abs_deadline);
|
||||
} else if (IsNetbsd ()) {
|
||||
err = nsync_mu_semaphore_p_with_deadline_sem (s, abs_deadline);
|
||||
|
@ -75,7 +70,7 @@ errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_d
|
|||
|
||||
/* Ensure that the count of *s is at least 1. */
|
||||
void nsync_mu_semaphore_v (nsync_semaphore *s) {
|
||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
||||
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||
return nsync_mu_semaphore_v_gcd (s);
|
||||
} else if (IsNetbsd ()) {
|
||||
return nsync_mu_semaphore_v_sem (s);
|
||||
|
|
14
third_party/nsync/mu_semaphore.internal.h
vendored
14
third_party/nsync/mu_semaphore.internal.h
vendored
|
@ -4,6 +4,20 @@
|
|||
#include "third_party/nsync/time.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/* XNU ulock (used by cosmo futexes) is an internal API, however:
|
||||
|
||||
1. Unlike GCD it's cancelable i.e. can be EINTR'd by signals
|
||||
2. We have no choice but to use ulock for joining threads
|
||||
3. Grand Central Dispatch requires a busy loop workaround
|
||||
4. ulock makes our mutexes use 20% more system time (meh)
|
||||
5. ulock makes our mutexes use 40% less wall time (good)
|
||||
6. ulock makes our mutexes use 64% less user time (woop)
|
||||
|
||||
ulock is an outstanding system call that must be used.
|
||||
gcd is not an acceptable alternative to ulock. */
|
||||
|
||||
#define NSYNC_USE_GRAND_CENTRAL 0
|
||||
|
||||
bool nsync_mu_semaphore_init_futex(nsync_semaphore *);
|
||||
errno_t nsync_mu_semaphore_p_futex(nsync_semaphore *);
|
||||
errno_t nsync_mu_semaphore_p_with_deadline_futex(nsync_semaphore *, nsync_time);
|
||||
|
|
Loading…
Reference in a new issue