mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Make more improvements to threads and mappings
- NetBSD should now have faster synchronization - POSIX barriers may now be shared across processes - An edge case with memory map tracking has been fixed - Grand Central Dispatch is no longer used on MacOS ARM64 - POSIX mutexes in normal mode now use futexes across processes
This commit is contained in:
parent
2187d6d2dd
commit
e398f3887c
20 changed files with 566 additions and 171 deletions
|
@ -26,11 +26,9 @@
|
||||||
|
|
||||||
int begin_cancelation_point(void) {
|
int begin_cancelation_point(void) {
|
||||||
int state = 0;
|
int state = 0;
|
||||||
struct CosmoTib *tib;
|
|
||||||
struct PosixThread *pt;
|
|
||||||
if (__tls_enabled) {
|
if (__tls_enabled) {
|
||||||
tib = __get_tls();
|
struct PosixThread *pt;
|
||||||
if ((pt = (struct PosixThread *)tib->tib_pthread)) {
|
if ((pt = _pthread_self())) {
|
||||||
state = pt->pt_flags & PT_INCANCEL;
|
state = pt->pt_flags & PT_INCANCEL;
|
||||||
pt->pt_flags |= PT_INCANCEL;
|
pt->pt_flags |= PT_INCANCEL;
|
||||||
}
|
}
|
||||||
|
@ -39,11 +37,9 @@ int begin_cancelation_point(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void end_cancelation_point(int state) {
|
void end_cancelation_point(int state) {
|
||||||
struct CosmoTib *tib;
|
|
||||||
struct PosixThread *pt;
|
|
||||||
if (__tls_enabled) {
|
if (__tls_enabled) {
|
||||||
tib = __get_tls();
|
struct PosixThread *pt;
|
||||||
if ((pt = (struct PosixThread *)tib->tib_pthread)) {
|
if ((pt = _pthread_self())) {
|
||||||
pt->pt_flags &= ~PT_INCANCEL;
|
pt->pt_flags &= ~PT_INCANCEL;
|
||||||
pt->pt_flags |= state;
|
pt->pt_flags |= state;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
#include "libc/thread/tls2.internal.h"
|
#include "libc/thread/tls2.internal.h"
|
||||||
COSMOPOLITAN_C_START_
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
|
#define MAPS_RETRY ((void *)-1)
|
||||||
|
|
||||||
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
|
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
|
||||||
|
|
||||||
struct Map {
|
struct Map {
|
||||||
|
|
|
@ -120,6 +120,7 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
||||||
struct Map *map;
|
struct Map *map;
|
||||||
struct Map *next;
|
struct Map *next;
|
||||||
struct Map *floor;
|
struct Map *floor;
|
||||||
|
StartOver:
|
||||||
floor = __maps_floor(addr);
|
floor = __maps_floor(addr);
|
||||||
for (map = floor; map && map->addr <= addr + size; map = next) {
|
for (map = floor; map && map->addr <= addr + size; map = next) {
|
||||||
next = __maps_next(map);
|
next = __maps_next(map);
|
||||||
|
@ -148,6 +149,8 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
||||||
ASSERT(left > 0);
|
ASSERT(left > 0);
|
||||||
struct Map *leftmap;
|
struct Map *leftmap;
|
||||||
if ((leftmap = __maps_alloc())) {
|
if ((leftmap = __maps_alloc())) {
|
||||||
|
if (leftmap == MAPS_RETRY)
|
||||||
|
goto StartOver;
|
||||||
map->addr += left;
|
map->addr += left;
|
||||||
map->size = right;
|
map->size = right;
|
||||||
if (!(map->flags & MAP_ANONYMOUS))
|
if (!(map->flags & MAP_ANONYMOUS))
|
||||||
|
@ -167,6 +170,8 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
||||||
size_t right = map_addr + map_size - addr;
|
size_t right = map_addr + map_size - addr;
|
||||||
struct Map *rightmap;
|
struct Map *rightmap;
|
||||||
if ((rightmap = __maps_alloc())) {
|
if ((rightmap = __maps_alloc())) {
|
||||||
|
if (rightmap == MAPS_RETRY)
|
||||||
|
goto StartOver;
|
||||||
map->size = left;
|
map->size = left;
|
||||||
__maps.pages -= (right + pagesz - 1) / pagesz;
|
__maps.pages -= (right + pagesz - 1) / pagesz;
|
||||||
rightmap->addr = addr;
|
rightmap->addr = addr;
|
||||||
|
@ -184,8 +189,14 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
||||||
size_t right = map_size - middle - left;
|
size_t right = map_size - middle - left;
|
||||||
struct Map *leftmap;
|
struct Map *leftmap;
|
||||||
if ((leftmap = __maps_alloc())) {
|
if ((leftmap = __maps_alloc())) {
|
||||||
|
if (leftmap == MAPS_RETRY)
|
||||||
|
goto StartOver;
|
||||||
struct Map *middlemap;
|
struct Map *middlemap;
|
||||||
if ((middlemap = __maps_alloc())) {
|
if ((middlemap = __maps_alloc())) {
|
||||||
|
if (middlemap == MAPS_RETRY) {
|
||||||
|
__maps_free(leftmap);
|
||||||
|
goto StartOver;
|
||||||
|
}
|
||||||
leftmap->addr = map_addr;
|
leftmap->addr = map_addr;
|
||||||
leftmap->size = left;
|
leftmap->size = left;
|
||||||
leftmap->off = map->off;
|
leftmap->off = map->off;
|
||||||
|
@ -204,6 +215,7 @@ static int __muntrack(char *addr, size_t size, int pagesz,
|
||||||
*deleted = middlemap;
|
*deleted = middlemap;
|
||||||
__maps_check();
|
__maps_check();
|
||||||
} else {
|
} else {
|
||||||
|
__maps_free(leftmap);
|
||||||
rc = -1;
|
rc = -1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -304,12 +316,11 @@ struct Map *__maps_alloc(void) {
|
||||||
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK;
|
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK;
|
||||||
map->hand = sys.maphandle;
|
map->hand = sys.maphandle;
|
||||||
__maps_lock();
|
__maps_lock();
|
||||||
__maps_insert(map++);
|
__maps_insert(map);
|
||||||
__maps_unlock();
|
__maps_unlock();
|
||||||
map->addr = MAP_FAILED;
|
for (int i = 1; i < gransz / sizeof(struct Map); ++i)
|
||||||
for (int i = 1; i < gransz / sizeof(struct Map) - 1; ++i)
|
|
||||||
__maps_free(map + i);
|
__maps_free(map + i);
|
||||||
return map;
|
return MAPS_RETRY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __munmap(char *addr, size_t size) {
|
static int __munmap(char *addr, size_t size) {
|
||||||
|
@ -396,21 +407,32 @@ void *__maps_pickaddr(size_t size) {
|
||||||
static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
|
static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
|
||||||
int64_t off, int pagesz, int gransz) {
|
int64_t off, int pagesz, int gransz) {
|
||||||
|
|
||||||
|
// allocate Map object
|
||||||
|
struct Map *map;
|
||||||
|
do {
|
||||||
|
if (!(map = __maps_alloc()))
|
||||||
|
return MAP_FAILED;
|
||||||
|
} while (map == MAPS_RETRY);
|
||||||
|
|
||||||
// polyfill nuances of fixed mappings
|
// polyfill nuances of fixed mappings
|
||||||
int sysflags = flags;
|
int sysflags = flags;
|
||||||
bool noreplace = false;
|
bool noreplace = false;
|
||||||
bool should_untrack = false;
|
bool should_untrack = false;
|
||||||
if (flags & MAP_FIXED_NOREPLACE) {
|
if (flags & MAP_FIXED_NOREPLACE) {
|
||||||
if (flags & MAP_FIXED)
|
if (flags & MAP_FIXED) {
|
||||||
|
__maps_free(map);
|
||||||
return (void *)einval();
|
return (void *)einval();
|
||||||
|
}
|
||||||
sysflags &= ~MAP_FIXED_NOREPLACE;
|
sysflags &= ~MAP_FIXED_NOREPLACE;
|
||||||
if (IsLinux()) {
|
if (IsLinux()) {
|
||||||
noreplace = true;
|
noreplace = true;
|
||||||
sysflags |= MAP_FIXED_NOREPLACE_linux;
|
sysflags |= MAP_FIXED_NOREPLACE_linux;
|
||||||
} else if (IsFreebsd() || IsNetbsd()) {
|
} else if (IsFreebsd() || IsNetbsd()) {
|
||||||
sysflags |= MAP_FIXED;
|
sysflags |= MAP_FIXED;
|
||||||
if (__maps_overlaps(addr, size, pagesz))
|
if (__maps_overlaps(addr, size, pagesz)) {
|
||||||
|
__maps_free(map);
|
||||||
return (void *)eexist();
|
return (void *)eexist();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
noreplace = true;
|
noreplace = true;
|
||||||
}
|
}
|
||||||
|
@ -418,11 +440,6 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
|
||||||
should_untrack = true;
|
should_untrack = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocate Map object
|
|
||||||
struct Map *map;
|
|
||||||
if (!(map = __maps_alloc()))
|
|
||||||
return MAP_FAILED;
|
|
||||||
|
|
||||||
// remove mapping we blew away
|
// remove mapping we blew away
|
||||||
if (IsWindows() && should_untrack)
|
if (IsWindows() && should_untrack)
|
||||||
__munmap(addr, size);
|
__munmap(addr, size);
|
||||||
|
@ -572,23 +589,27 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
|
||||||
return (void *)einval();
|
return (void *)einval();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// allocate object for tracking new mapping
|
||||||
|
struct Map *map;
|
||||||
|
do {
|
||||||
|
if (!(map = __maps_alloc()))
|
||||||
|
return (void *)enomem();
|
||||||
|
} while (map == MAPS_RETRY);
|
||||||
|
|
||||||
// check old interval is fully contained within one mapping
|
// check old interval is fully contained within one mapping
|
||||||
struct Map *old_map;
|
struct Map *old_map;
|
||||||
if (!(old_map = __maps_floor(old_addr)) ||
|
if (!(old_map = __maps_floor(old_addr)) ||
|
||||||
old_addr + old_size > old_map->addr + PGUP(old_map->size) ||
|
old_addr + old_size > old_map->addr + PGUP(old_map->size) ||
|
||||||
old_addr < old_map->addr)
|
old_addr < old_map->addr) {
|
||||||
|
__maps_free(map);
|
||||||
return (void *)efault();
|
return (void *)efault();
|
||||||
|
}
|
||||||
|
|
||||||
// save old properties
|
// save old properties
|
||||||
int old_off = old_map->off;
|
int old_off = old_map->off;
|
||||||
int old_prot = old_map->prot;
|
int old_prot = old_map->prot;
|
||||||
int old_flags = old_map->flags;
|
int old_flags = old_map->flags;
|
||||||
|
|
||||||
// allocate object for tracking new mapping
|
|
||||||
struct Map *map;
|
|
||||||
if (!(map = __maps_alloc()))
|
|
||||||
return (void *)enomem();
|
|
||||||
|
|
||||||
// netbsd mremap fixed returns enoent rather than unmapping old pages
|
// netbsd mremap fixed returns enoent rather than unmapping old pages
|
||||||
if (IsNetbsd() && (flags & MREMAP_FIXED))
|
if (IsNetbsd() && (flags & MREMAP_FIXED))
|
||||||
if (__munmap(new_addr, new_size)) {
|
if (__munmap(new_addr, new_size)) {
|
||||||
|
|
|
@ -75,6 +75,7 @@ int __mprotect(char *addr, size_t size, int prot) {
|
||||||
return edeadlk();
|
return edeadlk();
|
||||||
}
|
}
|
||||||
struct Map *map, *floor;
|
struct Map *map, *floor;
|
||||||
|
StartOver:
|
||||||
floor = __maps_floor(addr);
|
floor = __maps_floor(addr);
|
||||||
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) {
|
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) {
|
||||||
char *map_addr = map->addr;
|
char *map_addr = map->addr;
|
||||||
|
@ -93,10 +94,12 @@ int __mprotect(char *addr, size_t size, int prot) {
|
||||||
}
|
}
|
||||||
} else if (addr <= map_addr) {
|
} else if (addr <= map_addr) {
|
||||||
// change lefthand side of mapping
|
// change lefthand side of mapping
|
||||||
size_t left = PGUP(addr + size - map_addr);
|
size_t left = addr + size - map_addr;
|
||||||
size_t right = map_size - left;
|
size_t right = map_size - left;
|
||||||
struct Map *leftmap;
|
struct Map *leftmap;
|
||||||
if ((leftmap = __maps_alloc())) {
|
if ((leftmap = __maps_alloc())) {
|
||||||
|
if (leftmap == MAPS_RETRY)
|
||||||
|
goto StartOver;
|
||||||
if (!__mprotect_chunk(map_addr, left, prot, false)) {
|
if (!__mprotect_chunk(map_addr, left, prot, false)) {
|
||||||
leftmap->addr = map_addr;
|
leftmap->addr = map_addr;
|
||||||
leftmap->size = left;
|
leftmap->size = left;
|
||||||
|
@ -127,6 +130,8 @@ int __mprotect(char *addr, size_t size, int prot) {
|
||||||
size_t right = map_addr + map_size - addr;
|
size_t right = map_addr + map_size - addr;
|
||||||
struct Map *leftmap;
|
struct Map *leftmap;
|
||||||
if ((leftmap = __maps_alloc())) {
|
if ((leftmap = __maps_alloc())) {
|
||||||
|
if (leftmap == MAPS_RETRY)
|
||||||
|
goto StartOver;
|
||||||
if (!__mprotect_chunk(map_addr + left, right, prot, false)) {
|
if (!__mprotect_chunk(map_addr + left, right, prot, false)) {
|
||||||
leftmap->addr = map_addr;
|
leftmap->addr = map_addr;
|
||||||
leftmap->size = left;
|
leftmap->size = left;
|
||||||
|
@ -159,8 +164,14 @@ int __mprotect(char *addr, size_t size, int prot) {
|
||||||
size_t right = map_size - middle - left;
|
size_t right = map_size - middle - left;
|
||||||
struct Map *leftmap;
|
struct Map *leftmap;
|
||||||
if ((leftmap = __maps_alloc())) {
|
if ((leftmap = __maps_alloc())) {
|
||||||
|
if (leftmap == MAPS_RETRY)
|
||||||
|
goto StartOver;
|
||||||
struct Map *midlmap;
|
struct Map *midlmap;
|
||||||
if ((midlmap = __maps_alloc())) {
|
if ((midlmap = __maps_alloc())) {
|
||||||
|
if (midlmap == MAPS_RETRY) {
|
||||||
|
__maps_free(leftmap);
|
||||||
|
goto StartOver;
|
||||||
|
}
|
||||||
if (!__mprotect_chunk(map_addr + left, middle, prot, false)) {
|
if (!__mprotect_chunk(map_addr + left, middle, prot, false)) {
|
||||||
leftmap->addr = map_addr;
|
leftmap->addr = map_addr;
|
||||||
leftmap->size = left;
|
leftmap->size = left;
|
||||||
|
|
|
@ -27,41 +27,47 @@
|
||||||
#include "libc/runtime/internal.h"
|
#include "libc/runtime/internal.h"
|
||||||
#include "libc/thread/lock.h"
|
#include "libc/thread/lock.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
#include "third_party/nsync/futex.internal.h"
|
||||||
#include "third_party/nsync/mu.h"
|
#include "third_party/nsync/mu.h"
|
||||||
|
|
||||||
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||||
int me;
|
|
||||||
int backoff = 0;
|
int backoff = 0;
|
||||||
uint64_t word, lock;
|
uint64_t lock;
|
||||||
|
for (;;) {
|
||||||
// get current state of lock
|
word = MUTEX_UNLOCK(word);
|
||||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
lock = MUTEX_LOCK(word);
|
||||||
|
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||||
#if PTHREAD_USE_NSYNC
|
memory_order_acquire,
|
||||||
// use fancy nsync mutex if possible
|
memory_order_relaxed))
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
return;
|
||||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
backoff = pthread_delay_np(mutex, backoff);
|
||||||
_weaken(nsync_mu_lock)) {
|
|
||||||
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
#endif
|
}
|
||||||
|
|
||||||
// implement barebones normal mutexes
|
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
// slightly improved to attempt acquiring multiple times b4 syscall
|
||||||
for (;;) {
|
static void pthread_mutex_lock_drepper(atomic_int *futex, char pshare) {
|
||||||
word = MUTEX_UNLOCK(word);
|
int word;
|
||||||
lock = MUTEX_LOCK(word);
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
word = 0;
|
||||||
memory_order_acquire,
|
if (atomic_compare_exchange_strong_explicit(
|
||||||
memory_order_relaxed))
|
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||||
return 0;
|
return;
|
||||||
backoff = pthread_delay_np(mutex, backoff);
|
pthread_pause_np();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (word == 1)
|
||||||
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||||
|
while (word > 0) {
|
||||||
|
_weaken(nsync_futex_wait_)(futex, 2, pshare, 0);
|
||||||
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// implement recursive mutexes
|
static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
|
||||||
me = gettid();
|
uint64_t word) {
|
||||||
|
uint64_t lock;
|
||||||
|
int backoff = 0;
|
||||||
|
int me = gettid();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (MUTEX_OWNER(word) == me) {
|
if (MUTEX_OWNER(word) == me) {
|
||||||
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
||||||
|
@ -91,6 +97,36 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||||
|
uint64_t word;
|
||||||
|
|
||||||
|
// get current state of lock
|
||||||
|
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||||
|
|
||||||
|
#if PTHREAD_USE_NSYNC
|
||||||
|
// use superior mutexes if possible
|
||||||
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||||
|
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||||
|
_weaken(nsync_mu_lock)) {
|
||||||
|
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// handle normal mutexes
|
||||||
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||||
|
if (_weaken(nsync_futex_wait_)) {
|
||||||
|
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||||
|
} else {
|
||||||
|
pthread_mutex_lock_naive(mutex, word);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle recursive and error checking mutexes
|
||||||
|
return pthread_mutex_lock_recursive(mutex, word);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Locks mutex.
|
* Locks mutex.
|
||||||
*
|
*
|
||||||
|
|
|
@ -24,54 +24,33 @@
|
||||||
#include "libc/runtime/internal.h"
|
#include "libc/runtime/internal.h"
|
||||||
#include "libc/thread/lock.h"
|
#include "libc/thread/lock.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
#include "third_party/nsync/futex.internal.h"
|
||||||
#include "third_party/nsync/mu.h"
|
#include "third_party/nsync/mu.h"
|
||||||
|
|
||||||
/**
|
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
|
||||||
* Attempts acquiring lock.
|
uint64_t word) {
|
||||||
*
|
uint64_t lock;
|
||||||
* Unlike pthread_mutex_lock() this function won't block and instead
|
word = MUTEX_UNLOCK(word);
|
||||||
* returns an error immediately if the lock couldn't be acquired.
|
lock = MUTEX_LOCK(word);
|
||||||
*
|
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||||
* @return 0 if lock was acquired, otherwise an errno
|
memory_order_acquire,
|
||||||
* @raise EAGAIN if maximum number of recursive locks is held
|
memory_order_relaxed))
|
||||||
* @raise EBUSY if lock is currently held in read or write mode
|
return 0;
|
||||||
* @raise EINVAL if `mutex` doesn't refer to an initialized lock
|
return EBUSY;
|
||||||
* @raise EDEADLK if `mutex` is `PTHREAD_MUTEX_ERRORCHECK` and the
|
}
|
||||||
* current thread already holds this mutex
|
|
||||||
*/
|
|
||||||
errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
|
||||||
int me;
|
|
||||||
uint64_t word, lock;
|
|
||||||
|
|
||||||
// get current state of lock
|
static errno_t pthread_mutex_trylock_drepper(atomic_int *futex) {
|
||||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
int word = 0;
|
||||||
|
if (atomic_compare_exchange_strong_explicit(
|
||||||
|
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||||
|
return 0;
|
||||||
|
return EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
#if PTHREAD_USE_NSYNC
|
static errno_t pthread_mutex_trylock_recursive(pthread_mutex_t *mutex,
|
||||||
// delegate to *NSYNC if possible
|
uint64_t word) {
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
|
uint64_t lock;
|
||||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
int me = gettid();
|
||||||
_weaken(nsync_mu_trylock)) {
|
|
||||||
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
return EBUSY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// handle normal mutexes
|
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
|
||||||
word = MUTEX_UNLOCK(word);
|
|
||||||
lock = MUTEX_LOCK(word);
|
|
||||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
|
||||||
memory_order_acquire,
|
|
||||||
memory_order_relaxed))
|
|
||||||
return 0;
|
|
||||||
return EBUSY;
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle recursive and error check mutexes
|
|
||||||
me = gettid();
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (MUTEX_OWNER(word) == me) {
|
if (MUTEX_OWNER(word) == me) {
|
||||||
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
||||||
|
@ -100,3 +79,47 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
||||||
return EBUSY;
|
return EBUSY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts acquiring lock.
|
||||||
|
*
|
||||||
|
* Unlike pthread_mutex_lock() this function won't block and instead
|
||||||
|
* returns an error immediately if the lock couldn't be acquired.
|
||||||
|
*
|
||||||
|
* @return 0 if lock was acquired, otherwise an errno
|
||||||
|
* @raise EAGAIN if maximum number of recursive locks is held
|
||||||
|
* @raise EBUSY if lock is currently held in read or write mode
|
||||||
|
* @raise EINVAL if `mutex` doesn't refer to an initialized lock
|
||||||
|
* @raise EDEADLK if `mutex` is `PTHREAD_MUTEX_ERRORCHECK` and the
|
||||||
|
* current thread already holds this mutex
|
||||||
|
*/
|
||||||
|
errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
||||||
|
|
||||||
|
// get current state of lock
|
||||||
|
uint64_t word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||||
|
|
||||||
|
#if PTHREAD_USE_NSYNC
|
||||||
|
// use superior mutexes if possible
|
||||||
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
|
||||||
|
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||||
|
_weaken(nsync_mu_trylock)) {
|
||||||
|
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return EBUSY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// handle normal mutexes
|
||||||
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||||
|
if (_weaken(nsync_futex_wait_)) {
|
||||||
|
return pthread_mutex_trylock_drepper(&mutex->_futex);
|
||||||
|
} else {
|
||||||
|
return pthread_mutex_trylock_naive(mutex, word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle recursive and error checking mutexes
|
||||||
|
return pthread_mutex_trylock_recursive(mutex, word);
|
||||||
|
}
|
||||||
|
|
|
@ -25,45 +25,26 @@
|
||||||
#include "libc/runtime/internal.h"
|
#include "libc/runtime/internal.h"
|
||||||
#include "libc/thread/lock.h"
|
#include "libc/thread/lock.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
#include "third_party/nsync/futex.internal.h"
|
||||||
#include "third_party/nsync/mu.h"
|
#include "third_party/nsync/mu.h"
|
||||||
|
|
||||||
/**
|
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||||
* Releases mutex.
|
uint64_t lock = MUTEX_UNLOCK(word);
|
||||||
*
|
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
|
||||||
* This function does nothing in vfork() children.
|
}
|
||||||
*
|
|
||||||
* @return 0 on success or error number on failure
|
|
||||||
* @raises EPERM if in error check mode and not owned by caller
|
|
||||||
* @vforksafe
|
|
||||||
*/
|
|
||||||
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
|
||||||
int me;
|
|
||||||
uint64_t word, lock;
|
|
||||||
|
|
||||||
LOCKTRACE("pthread_mutex_unlock(%t)", mutex);
|
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||||
|
static void pthread_mutex_unlock_drepper(atomic_int *futex, char pshare) {
|
||||||
// get current state of lock
|
int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
|
||||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
if (word == 2) {
|
||||||
|
atomic_store_explicit(futex, 0, memory_order_release);
|
||||||
#if PTHREAD_USE_NSYNC
|
_weaken(nsync_futex_wake_)(futex, 1, pshare);
|
||||||
// use fancy nsync mutex if possible
|
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
|
||||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
|
||||||
_weaken(nsync_mu_unlock)) {
|
|
||||||
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
#endif
|
}
|
||||||
|
|
||||||
// implement barebones normal mutexes
|
static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
uint64_t word) {
|
||||||
lock = MUTEX_UNLOCK(word);
|
int me = gettid();
|
||||||
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// implement recursive mutex unlocking
|
|
||||||
me = gettid();
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
||||||
// we allow unlocking an initialized lock that wasn't locked, but we
|
// we allow unlocking an initialized lock that wasn't locked, but we
|
||||||
|
@ -88,3 +69,44 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases mutex.
|
||||||
|
*
|
||||||
|
* This function does nothing in vfork() children.
|
||||||
|
*
|
||||||
|
* @return 0 on success or error number on failure
|
||||||
|
* @raises EPERM if in error check mode and not owned by caller
|
||||||
|
* @vforksafe
|
||||||
|
*/
|
||||||
|
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
||||||
|
uint64_t word;
|
||||||
|
|
||||||
|
LOCKTRACE("pthread_mutex_unlock(%t)", mutex);
|
||||||
|
|
||||||
|
// get current state of lock
|
||||||
|
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||||
|
|
||||||
|
#if PTHREAD_USE_NSYNC
|
||||||
|
// use superior mutexes if possible
|
||||||
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||||
|
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||||
|
_weaken(nsync_mu_unlock)) {
|
||||||
|
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// implement barebones normal mutexes
|
||||||
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL) {
|
||||||
|
if (_weaken(nsync_futex_wake_)) {
|
||||||
|
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||||
|
} else {
|
||||||
|
pthread_mutex_unlock_naive(mutex, word);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle recursive and error checking mutexes
|
||||||
|
return pthread_mutex_unlock_recursive(mutex, word);
|
||||||
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ void sys_sched_yield(void);
|
||||||
int pthread_yield_np(void) {
|
int pthread_yield_np(void) {
|
||||||
if (IsXnuSilicon()) {
|
if (IsXnuSilicon()) {
|
||||||
__syslib->__pthread_yield_np();
|
__syslib->__pthread_yield_np();
|
||||||
} else if (IsOpenbsd() || IsNetbsd()) {
|
} else if (IsOpenbsd()) {
|
||||||
// sched_yield() is punishingly slow on OpenBSD
|
// sched_yield() is punishingly slow on OpenBSD
|
||||||
// it's ruinously slow it'll destroy everything
|
// it's ruinously slow it'll destroy everything
|
||||||
pthread_pause_np();
|
pthread_pause_np();
|
||||||
|
|
|
@ -16,9 +16,10 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/errno.h"
|
||||||
|
#include "libc/intrin/atomic.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
#include "third_party/nsync/counter.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destroys barrier.
|
* Destroys barrier.
|
||||||
|
@ -27,9 +28,8 @@
|
||||||
* @raise EINVAL if threads are still inside the barrier
|
* @raise EINVAL if threads are still inside the barrier
|
||||||
*/
|
*/
|
||||||
errno_t pthread_barrier_destroy(pthread_barrier_t *barrier) {
|
errno_t pthread_barrier_destroy(pthread_barrier_t *barrier) {
|
||||||
if (barrier->_nsync) {
|
if (atomic_load_explicit(&barrier->_waiters, memory_order_relaxed))
|
||||||
nsync_counter_free(barrier->_nsync);
|
return EINVAL;
|
||||||
barrier->_nsync = 0;
|
memset(barrier, -1, sizeof(*barrier));
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,9 @@
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
|
#include "libc/intrin/atomic.h"
|
||||||
|
#include "libc/limits.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
#include "third_party/nsync/counter.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes barrier.
|
* Initializes barrier.
|
||||||
|
@ -28,16 +29,17 @@
|
||||||
* before the barrier is released, which must be greater than zero
|
* before the barrier is released, which must be greater than zero
|
||||||
* @return 0 on success, or error number on failure
|
* @return 0 on success, or error number on failure
|
||||||
* @raise EINVAL if `count` isn't greater than zero
|
* @raise EINVAL if `count` isn't greater than zero
|
||||||
* @raise ENOMEM if insufficient memory exists
|
|
||||||
*/
|
*/
|
||||||
errno_t pthread_barrier_init(pthread_barrier_t *barrier,
|
errno_t pthread_barrier_init(pthread_barrier_t *barrier,
|
||||||
const pthread_barrierattr_t *attr,
|
const pthread_barrierattr_t *attr,
|
||||||
unsigned count) {
|
unsigned count) {
|
||||||
nsync_counter c;
|
|
||||||
if (!count)
|
if (!count)
|
||||||
return EINVAL;
|
return EINVAL;
|
||||||
if (!(c = nsync_counter_new(count)))
|
if (count > INT_MAX)
|
||||||
return ENOMEM;
|
return EINVAL;
|
||||||
*barrier = (pthread_barrier_t){._nsync = c};
|
barrier->_count = count;
|
||||||
|
barrier->_pshared = attr ? *attr : PTHREAD_PROCESS_PRIVATE;
|
||||||
|
atomic_store_explicit(&barrier->_counter, count, memory_order_relaxed);
|
||||||
|
atomic_store_explicit(&barrier->_waiters, 0, memory_order_relaxed);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,25 +16,53 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/calls/blockcancel.internal.h"
|
||||||
|
#include "libc/errno.h"
|
||||||
|
#include "libc/intrin/atomic.h"
|
||||||
|
#include "libc/limits.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
#include "third_party/nsync/counter.h"
|
#include "third_party/nsync/futex.internal.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits for all threads to arrive at barrier.
|
* Waits for all threads to arrive at barrier.
|
||||||
*
|
*
|
||||||
* When the barrier is broken, the state becomes reset to what it was
|
* When the barrier is broken, the state becomes reset to what it was
|
||||||
* when pthread_barrier_init() was called, so that the barrior may be
|
* when pthread_barrier_init() was called, so that the barrior may be
|
||||||
* used again in the same way. The last thread to arrive shall be the
|
* used again in the same way.
|
||||||
* last to leave and it returns a magic value.
|
*
|
||||||
|
* Unlike pthread_cond_timedwait() this function is not a cancelation
|
||||||
|
* point. It is not needed to have cleanup handlers on block cancels.
|
||||||
*
|
*
|
||||||
* @return 0 on success, `PTHREAD_BARRIER_SERIAL_THREAD` to one lucky
|
* @return 0 on success, `PTHREAD_BARRIER_SERIAL_THREAD` to one lucky
|
||||||
* thread which was the last arrival, or an errno on error
|
* thread which was the last arrival, or an errno on error
|
||||||
|
* @raise EINVAL if barrier is used incorrectly
|
||||||
*/
|
*/
|
||||||
errno_t pthread_barrier_wait(pthread_barrier_t *barrier) {
|
errno_t pthread_barrier_wait(pthread_barrier_t *barrier) {
|
||||||
if (nsync_counter_add(barrier->_nsync, -1)) {
|
int n;
|
||||||
nsync_counter_wait(barrier->_nsync, nsync_time_no_deadline);
|
|
||||||
return 0;
|
// enter barrier
|
||||||
} else {
|
atomic_fetch_add_explicit(&barrier->_waiters, 1, memory_order_acq_rel);
|
||||||
|
n = atomic_fetch_sub_explicit(&barrier->_counter, 1, memory_order_acq_rel);
|
||||||
|
n = n - 1;
|
||||||
|
|
||||||
|
// this can only happen on invalid usage
|
||||||
|
if (n < 0)
|
||||||
|
return EINVAL;
|
||||||
|
|
||||||
|
// reset count and wake waiters if we're last at barrier
|
||||||
|
if (!n) {
|
||||||
|
atomic_store_explicit(&barrier->_counter, barrier->_count,
|
||||||
|
memory_order_release);
|
||||||
|
atomic_store_explicit(&barrier->_waiters, 0, memory_order_release);
|
||||||
|
nsync_futex_wake_(&barrier->_waiters, INT_MAX, barrier->_pshared);
|
||||||
return PTHREAD_BARRIER_SERIAL_THREAD;
|
return PTHREAD_BARRIER_SERIAL_THREAD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wait for everyone else to arrive at barrier
|
||||||
|
BLOCK_CANCELATION;
|
||||||
|
while ((n = atomic_load_explicit(&barrier->_waiters, memory_order_acquire)))
|
||||||
|
nsync_futex_wait_(&barrier->_waiters, n, barrier->_pshared, 0);
|
||||||
|
ALLOW_CANCELATION;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
*
|
*
|
||||||
* @param pshared is set to one of the following
|
* @param pshared is set to one of the following
|
||||||
* - `PTHREAD_PROCESS_PRIVATE` (default)
|
* - `PTHREAD_PROCESS_PRIVATE` (default)
|
||||||
* - `PTHREAD_PROCESS_SHARED` (unsupported)
|
* - `PTHREAD_PROCESS_SHARED`
|
||||||
* @return 0 on success, or error on failure
|
* @return 0 on success, or error on failure
|
||||||
*/
|
*/
|
||||||
errno_t pthread_barrierattr_getpshared(const pthread_barrierattr_t *attr,
|
errno_t pthread_barrierattr_getpshared(const pthread_barrierattr_t *attr,
|
||||||
|
|
|
@ -24,6 +24,6 @@
|
||||||
* @return 0 on success, or error on failure
|
* @return 0 on success, or error on failure
|
||||||
*/
|
*/
|
||||||
errno_t pthread_barrierattr_init(pthread_barrierattr_t *attr) {
|
errno_t pthread_barrierattr_init(pthread_barrierattr_t *attr) {
|
||||||
*attr = 0;
|
*attr = PTHREAD_PROCESS_PRIVATE;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,13 +24,14 @@
|
||||||
*
|
*
|
||||||
* @param pshared can be one of
|
* @param pshared can be one of
|
||||||
* - `PTHREAD_PROCESS_PRIVATE` (default)
|
* - `PTHREAD_PROCESS_PRIVATE` (default)
|
||||||
* - `PTHREAD_PROCESS_SHARED` (unsupported)
|
* - `PTHREAD_PROCESS_SHARED`
|
||||||
* @return 0 on success, or error on failure
|
* @return 0 on success, or error on failure
|
||||||
* @raises EINVAL if `pshared` is invalid
|
* @raises EINVAL if `pshared` is invalid
|
||||||
*/
|
*/
|
||||||
errno_t pthread_barrierattr_setpshared(pthread_barrierattr_t *attr,
|
errno_t pthread_barrierattr_setpshared(pthread_barrierattr_t *attr,
|
||||||
int pshared) {
|
int pshared) {
|
||||||
switch (pshared) {
|
switch (pshared) {
|
||||||
|
case PTHREAD_PROCESS_SHARED:
|
||||||
case PTHREAD_PROCESS_PRIVATE:
|
case PTHREAD_PROCESS_PRIVATE:
|
||||||
*attr = pshared;
|
*attr = pshared;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -46,7 +46,7 @@ COSMOPOLITAN_C_START_
|
||||||
#define PTHREAD_RWLOCK_INITIALIZER {0}
|
#define PTHREAD_RWLOCK_INITIALIZER {0}
|
||||||
#define PTHREAD_MUTEX_INITIALIZER {0}
|
#define PTHREAD_MUTEX_INITIALIZER {0}
|
||||||
|
|
||||||
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, 0, PTHREAD_MUTEX_RECURSIVE}
|
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, {}, PTHREAD_MUTEX_RECURSIVE}
|
||||||
|
|
||||||
typedef uintptr_t pthread_t;
|
typedef uintptr_t pthread_t;
|
||||||
typedef int pthread_id_np_t;
|
typedef int pthread_id_np_t;
|
||||||
|
@ -66,7 +66,10 @@ typedef struct pthread_spinlock_s {
|
||||||
|
|
||||||
typedef struct pthread_mutex_s {
|
typedef struct pthread_mutex_s {
|
||||||
uint32_t _nsync;
|
uint32_t _nsync;
|
||||||
int32_t _pid;
|
union {
|
||||||
|
int32_t _pid;
|
||||||
|
_Atomic(int32_t) _futex;
|
||||||
|
};
|
||||||
_Atomic(uint64_t) _word;
|
_Atomic(uint64_t) _word;
|
||||||
} pthread_mutex_t;
|
} pthread_mutex_t;
|
||||||
|
|
||||||
|
@ -92,7 +95,10 @@ typedef struct pthread_rwlock_s {
|
||||||
} pthread_rwlock_t;
|
} pthread_rwlock_t;
|
||||||
|
|
||||||
typedef struct pthread_barrier_s {
|
typedef struct pthread_barrier_s {
|
||||||
void *_nsync;
|
int _count;
|
||||||
|
char _pshared;
|
||||||
|
_Atomic(int) _counter;
|
||||||
|
_Atomic(int) _waiters;
|
||||||
} pthread_barrier_t;
|
} pthread_barrier_t;
|
||||||
|
|
||||||
typedef struct pthread_attr_s {
|
typedef struct pthread_attr_s {
|
||||||
|
|
236
test/libc/thread/footek_test.c
Normal file
236
test/libc/thread/footek_test.c
Normal file
|
@ -0,0 +1,236 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cosmo.h>
|
||||||
|
#include <linux/futex.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdatomic.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "third_party/nsync/futex.internal.h"
|
||||||
|
|
||||||
|
// THIS IS AN EXAMPLE OF HOW TO USE COSMOPOLITAN FUTEXES TO IMPLEMENT
|
||||||
|
// YOUR OWN MUTEXES FROM SCRATCH. LOOK AT HOW MUCH BETTER THIS IT CAN
|
||||||
|
// MAKE THINGS COMPARED TO SPIN LOCKS. ALGORITHM FROM ULRICH DREPPER.
|
||||||
|
|
||||||
|
// arm fleet
|
||||||
|
// with futexes
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// 242,604 us real
|
||||||
|
// 4,222,946 us user
|
||||||
|
// 1,079,229 us sys
|
||||||
|
// footek_test on studio.test. 630 µs 17'415 µs 256'782 µs
|
||||||
|
// 1,362,557 us real
|
||||||
|
// 3,232,978 us user
|
||||||
|
// 2,104,824 us sys
|
||||||
|
// footek_test on pi.test. 611 µs 21'708 µs 1'385'129 µs
|
||||||
|
// 1,346,482 us real
|
||||||
|
// 3,370,513 us user
|
||||||
|
// 1,992,383 us sys
|
||||||
|
// footek_test on freebsdarm.test. 427 µs 19'967 µs 1'393'476 µs
|
||||||
|
|
||||||
|
// arm fleet
|
||||||
|
// without futexes
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// 1,282,084 us real
|
||||||
|
// 29,359,582 us user
|
||||||
|
// 34,553 us sys
|
||||||
|
// footek_test on studio.test. 961 µs 12'907 µs 1'287'983 µs
|
||||||
|
// 4,070,988 us real
|
||||||
|
// 16,203,990 us user
|
||||||
|
// 7,999 us sys
|
||||||
|
// footek_test on pi.test. 459 µs 16'376 µs 4'095'512 µs
|
||||||
|
// 7,012,493 us real
|
||||||
|
// 27,936,725 us user
|
||||||
|
// 7,871 us sys
|
||||||
|
// footek_test on freebsdarm.test. 502 µs 16'446 µs 7'051'545 µs
|
||||||
|
|
||||||
|
// x86 fleet
|
||||||
|
// with futexes
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// 146,015 us real
|
||||||
|
// 169,427 us user
|
||||||
|
// 68,939 us sys
|
||||||
|
// footek_test on rhel7.test. 376 µs 2'259 µs 153'024 µs
|
||||||
|
// 144,917 us real
|
||||||
|
// 383,317 us user
|
||||||
|
// 191,203 us sys
|
||||||
|
// footek_test on xnu.test. 11'143 µs 9'159 µs 164'865 µs
|
||||||
|
// 244,286 us real
|
||||||
|
// 405,395 us user
|
||||||
|
// 956,122 us sys
|
||||||
|
// footek_test on freebsd.test. 394 µs 2'165 µs 256'227 µs
|
||||||
|
// 209,095 us real
|
||||||
|
// 616,634 us user
|
||||||
|
// 9,945 us sys
|
||||||
|
// footek_test on netbsd.test. 502 µs 2'020 µs 261'895 µs
|
||||||
|
// 344,876 us real
|
||||||
|
// 50,000 us user
|
||||||
|
// 1,240,000 us sys
|
||||||
|
// footek_test on openbsd.test. 457 µs 2'737 µs 396'342 µs
|
||||||
|
// 1,193,906 us real
|
||||||
|
// 17,546,875 us user
|
||||||
|
// 3,000,000 us sys
|
||||||
|
// footek_test on win10.test. 462 µs 59'528 µs 1'348'265 µs
|
||||||
|
|
||||||
|
// x86 fleet
|
||||||
|
// without futexes
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// 897,815 us real
|
||||||
|
// 1,763,705 us user
|
||||||
|
// 9,696 us sys
|
||||||
|
// footek_test on rhel7.test. 423 µs 2'638 µs 912'241 µs
|
||||||
|
// 790,332 us real
|
||||||
|
// 2,359,967 us user
|
||||||
|
// 0 us sys
|
||||||
|
// footek_test on netbsd.test. 1'151 µs 2'634 µs 1'014'867 µs
|
||||||
|
// 2,332,724 us real
|
||||||
|
// 9,150,000 us user
|
||||||
|
// 10,000 us sys
|
||||||
|
// footek_test on openbsd.test. 557 µs 3'020 µs 2'554'648 µs
|
||||||
|
// 2,528,863 us real
|
||||||
|
// 56,546,875 us user
|
||||||
|
// 1,671,875 us sys
|
||||||
|
// footek_test on win10.test. 962 µs 9'698 µs 2'751'905 µs
|
||||||
|
// 2,916,033 us real
|
||||||
|
// 17,236,103 us user
|
||||||
|
// 0 us sys
|
||||||
|
// footek_test on freebsd.test. 690 µs 3'011 µs 2'925'997 µs
|
||||||
|
// 4,225,726 us real
|
||||||
|
// 16,679,456 us user
|
||||||
|
// 16,265 us sys
|
||||||
|
// footek_test on xnu.test. 98'468 µs 5'242 µs 5'191'724 µs
|
||||||
|
|
||||||
|
#define USE_FUTEX 1
|
||||||
|
#define THREADS 30
|
||||||
|
#define ITERATIONS 30000
|
||||||
|
|
||||||
|
#define MUTEX_LOCKED(word) ((word) & 8)
|
||||||
|
#define MUTEX_WAITING(word) ((word) & 16)
|
||||||
|
|
||||||
|
#define MUTEX_LOCK(word) ((word) | 8)
|
||||||
|
#define MUTEX_SET_WAITING(word) ((word) | 16)
|
||||||
|
#define MUTEX_UNLOCK(word) ((word) & ~(8 | 16))
|
||||||
|
|
||||||
|
void lock(atomic_int *futex) {
|
||||||
|
int word, cs;
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
word = 0;
|
||||||
|
if (atomic_compare_exchange_strong_explicit(
|
||||||
|
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||||
|
return;
|
||||||
|
pthread_pause_np();
|
||||||
|
}
|
||||||
|
if (word == 1)
|
||||||
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||||
|
while (word > 0) {
|
||||||
|
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
|
||||||
|
#if USE_FUTEX
|
||||||
|
nsync_futex_wait_(futex, 2, 0, 0);
|
||||||
|
#endif
|
||||||
|
pthread_setcancelstate(cs, 0);
|
||||||
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unlock(atomic_int *futex) {
|
||||||
|
int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
|
||||||
|
if (word == 2) {
|
||||||
|
atomic_store_explicit(futex, 0, memory_order_release);
|
||||||
|
#if USE_FUTEX
|
||||||
|
nsync_futex_wake_(futex, 1, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int g_chores;
|
||||||
|
atomic_int g_lock;
|
||||||
|
pthread_mutex_t g_locker;
|
||||||
|
|
||||||
|
void *worker(void *arg) {
|
||||||
|
for (int i = 0; i < ITERATIONS; ++i) {
|
||||||
|
lock(&g_lock);
|
||||||
|
++g_chores;
|
||||||
|
unlock(&g_lock);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
struct timeval start;
|
||||||
|
gettimeofday(&start, 0);
|
||||||
|
|
||||||
|
pthread_t th[THREADS];
|
||||||
|
for (int i = 0; i < THREADS; ++i)
|
||||||
|
pthread_create(&th[i], 0, worker, 0);
|
||||||
|
for (int i = 0; i < THREADS; ++i)
|
||||||
|
pthread_join(th[i], 0);
|
||||||
|
npassert(g_chores == THREADS * ITERATIONS);
|
||||||
|
|
||||||
|
struct rusage ru;
|
||||||
|
struct timeval end;
|
||||||
|
gettimeofday(&end, 0);
|
||||||
|
getrusage(RUSAGE_SELF, &ru);
|
||||||
|
printf("%,16ld us real\n"
|
||||||
|
"%,16ld us user\n"
|
||||||
|
"%,16ld us sys\n",
|
||||||
|
timeval_tomicros(timeval_sub(end, start)), //
|
||||||
|
timeval_tomicros(ru.ru_utime), //
|
||||||
|
timeval_tomicros(ru.ru_stime));
|
||||||
|
|
||||||
|
CheckForMemoryLeaks();
|
||||||
|
}
|
||||||
|
|
||||||
|
// COMPARE ULRICH DREPPER'S LOCKING ALGORITHM WITH MIKE BURROWS *NSYNC
|
||||||
|
// WHICH IS WHAT COSMOPOLITAN LIBC USES FOR YOUR POSIX THREADS MUTEXES
|
||||||
|
|
||||||
|
// x86 fleet
|
||||||
|
// with pthread_mutex_t
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// 186,976 us real
|
||||||
|
// 43,609 us user
|
||||||
|
// 205,585 us sys
|
||||||
|
// footek_test on freebsd.test. 410 µs 2'054 µs 195'339 µs
|
||||||
|
// 238,902 us real
|
||||||
|
// 235,743 us user
|
||||||
|
// 97,881 us sys
|
||||||
|
// footek_test on rhel7.test. 343 µs 2'339 µs 246'926 µs
|
||||||
|
// 201,285 us real
|
||||||
|
// 249,612 us user
|
||||||
|
// 141,230 us sys
|
||||||
|
// footek_test on xnu.test. 1'960 µs 5'350 µs 265'758 µs
|
||||||
|
// 303,363 us real
|
||||||
|
// 60,000 us user
|
||||||
|
// 410,000 us sys
|
||||||
|
// footek_test on openbsd.test. 545 µs 3'023 µs 326'200 µs
|
||||||
|
// 386,085 us real
|
||||||
|
// 586,455 us user
|
||||||
|
// 466,991 us sys
|
||||||
|
// footek_test on netbsd.test. 344 µs 2'421 µs 413'440 µs
|
||||||
|
// 245,010 us real
|
||||||
|
// 437,500 us user
|
||||||
|
// 140,625 us sys
|
||||||
|
// footek_test on win10.test. 300 µs 18'574 µs 441'225 µs
|
||||||
|
|
||||||
|
// arm fleet
|
||||||
|
// with pthread_mutex_t
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// 87,132 us real
|
||||||
|
// 183,517 us user
|
||||||
|
// 20,020 us sys
|
||||||
|
// footek_test on studio.test. 560 µs 12'418 µs 92'825 µs
|
||||||
|
// 679,374 us real
|
||||||
|
// 957,678 us user
|
||||||
|
// 605,078 us sys
|
||||||
|
// footek_test on pi.test. 462 µs 16'574 µs 702'833 µs
|
||||||
|
// 902,343 us real
|
||||||
|
// 1,459,706 us user
|
||||||
|
// 781,140 us sys
|
||||||
|
// footek_test on freebsdarm.test. 400 µs 16'261 µs 970'022 µs
|
7
third_party/nsync/common.c
vendored
7
third_party/nsync/common.c
vendored
|
@ -37,6 +37,7 @@
|
||||||
#include "third_party/nsync/atomic.internal.h"
|
#include "third_party/nsync/atomic.internal.h"
|
||||||
#include "third_party/nsync/common.internal.h"
|
#include "third_party/nsync/common.internal.h"
|
||||||
#include "third_party/nsync/mu_semaphore.h"
|
#include "third_party/nsync/mu_semaphore.h"
|
||||||
|
#include "third_party/nsync/mu_semaphore.internal.h"
|
||||||
#include "third_party/nsync/wait_s.internal.h"
|
#include "third_party/nsync/wait_s.internal.h"
|
||||||
__static_yoink("nsync_notice");
|
__static_yoink("nsync_notice");
|
||||||
|
|
||||||
|
@ -147,9 +148,9 @@ static void free_waiters_push (waiter *w) {
|
||||||
|
|
||||||
static void free_waiters_populate (void) {
|
static void free_waiters_populate (void) {
|
||||||
int n;
|
int n;
|
||||||
if (IsNetbsd () || IsXnuSilicon ()) {
|
if (IsNetbsd () || (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ())) {
|
||||||
// netbsd needs one file descriptor per semaphore (!!)
|
// netbsd needs a real file descriptor per semaphore
|
||||||
// tim cook wants us to use his grand central dispatch
|
// tim cook wants us to use his lol central dispatch
|
||||||
n = 1;
|
n = 1;
|
||||||
} else {
|
} else {
|
||||||
n = getpagesize() / sizeof(waiter);
|
n = getpagesize() / sizeof(waiter);
|
||||||
|
|
5
third_party/nsync/futex.c
vendored
5
third_party/nsync/futex.c
vendored
|
@ -52,6 +52,7 @@
|
||||||
#include "third_party/nsync/atomic.h"
|
#include "third_party/nsync/atomic.h"
|
||||||
#include "third_party/nsync/common.internal.h"
|
#include "third_party/nsync/common.internal.h"
|
||||||
#include "third_party/nsync/futex.internal.h"
|
#include "third_party/nsync/futex.internal.h"
|
||||||
|
#include "libc/intrin/kprintf.h"
|
||||||
#include "third_party/nsync/time.h"
|
#include "third_party/nsync/time.h"
|
||||||
|
|
||||||
#define FUTEX_WAIT_BITS_ FUTEX_BITSET_MATCH_ANY
|
#define FUTEX_WAIT_BITS_ FUTEX_BITSET_MATCH_ANY
|
||||||
|
@ -138,7 +139,7 @@ static int nsync_futex_polyfill_ (atomic_int *w, int expect, struct timespec *ab
|
||||||
}
|
}
|
||||||
if (_weaken (pthread_testcancel_np) &&
|
if (_weaken (pthread_testcancel_np) &&
|
||||||
_weaken (pthread_testcancel_np) ()) {
|
_weaken (pthread_testcancel_np) ()) {
|
||||||
return -ETIMEDOUT;
|
return -ECANCELED;
|
||||||
}
|
}
|
||||||
if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) {
|
if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) {
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
|
@ -163,7 +164,7 @@ static int nsync_futex_wait_win32_ (atomic_int *w, int expect, char pshare,
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
now = timespec_real ();
|
now = timespec_real ();
|
||||||
if (timespec_cmp (now, deadline) > 0) {
|
if (timespec_cmp (now, deadline) >= 0) {
|
||||||
return etimedout();
|
return etimedout();
|
||||||
}
|
}
|
||||||
wait = timespec_sub (deadline, now);
|
wait = timespec_sub (deadline, now);
|
||||||
|
|
13
third_party/nsync/mu_semaphore.c
vendored
13
third_party/nsync/mu_semaphore.c
vendored
|
@ -21,14 +21,9 @@
|
||||||
#include "third_party/nsync/mu_semaphore.internal.h"
|
#include "third_party/nsync/mu_semaphore.internal.h"
|
||||||
__static_yoink("nsync_notice");
|
__static_yoink("nsync_notice");
|
||||||
|
|
||||||
/* Apple's ulock (part by Cosmo futexes) is an internal API, but:
|
|
||||||
1. Unlike GCD it's cancellable, i.e. can be EINTR'd by signals
|
|
||||||
2. We currently always use ulock anyway for joining threads */
|
|
||||||
#define PREFER_GCD_OVER_ULOCK 1
|
|
||||||
|
|
||||||
/* Initialize *s; the initial value is 0. */
|
/* Initialize *s; the initial value is 0. */
|
||||||
bool nsync_mu_semaphore_init (nsync_semaphore *s) {
|
bool nsync_mu_semaphore_init (nsync_semaphore *s) {
|
||||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||||
return nsync_mu_semaphore_init_gcd (s);
|
return nsync_mu_semaphore_init_gcd (s);
|
||||||
} else if (IsNetbsd ()) {
|
} else if (IsNetbsd ()) {
|
||||||
return nsync_mu_semaphore_init_sem (s);
|
return nsync_mu_semaphore_init_sem (s);
|
||||||
|
@ -44,7 +39,7 @@ bool nsync_mu_semaphore_init (nsync_semaphore *s) {
|
||||||
errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
|
errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
|
||||||
errno_t err;
|
errno_t err;
|
||||||
BEGIN_CANCELATION_POINT;
|
BEGIN_CANCELATION_POINT;
|
||||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||||
err = nsync_mu_semaphore_p_gcd (s);
|
err = nsync_mu_semaphore_p_gcd (s);
|
||||||
} else if (IsNetbsd ()) {
|
} else if (IsNetbsd ()) {
|
||||||
err = nsync_mu_semaphore_p_sem (s);
|
err = nsync_mu_semaphore_p_sem (s);
|
||||||
|
@ -62,7 +57,7 @@ errno_t nsync_mu_semaphore_p (nsync_semaphore *s) {
|
||||||
errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline) {
|
errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline) {
|
||||||
errno_t err;
|
errno_t err;
|
||||||
BEGIN_CANCELATION_POINT;
|
BEGIN_CANCELATION_POINT;
|
||||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||||
err = nsync_mu_semaphore_p_with_deadline_gcd (s, abs_deadline);
|
err = nsync_mu_semaphore_p_with_deadline_gcd (s, abs_deadline);
|
||||||
} else if (IsNetbsd ()) {
|
} else if (IsNetbsd ()) {
|
||||||
err = nsync_mu_semaphore_p_with_deadline_sem (s, abs_deadline);
|
err = nsync_mu_semaphore_p_with_deadline_sem (s, abs_deadline);
|
||||||
|
@ -75,7 +70,7 @@ errno_t nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_d
|
||||||
|
|
||||||
/* Ensure that the count of *s is at least 1. */
|
/* Ensure that the count of *s is at least 1. */
|
||||||
void nsync_mu_semaphore_v (nsync_semaphore *s) {
|
void nsync_mu_semaphore_v (nsync_semaphore *s) {
|
||||||
if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
|
if (NSYNC_USE_GRAND_CENTRAL && IsXnuSilicon ()) {
|
||||||
return nsync_mu_semaphore_v_gcd (s);
|
return nsync_mu_semaphore_v_gcd (s);
|
||||||
} else if (IsNetbsd ()) {
|
} else if (IsNetbsd ()) {
|
||||||
return nsync_mu_semaphore_v_sem (s);
|
return nsync_mu_semaphore_v_sem (s);
|
||||||
|
|
14
third_party/nsync/mu_semaphore.internal.h
vendored
14
third_party/nsync/mu_semaphore.internal.h
vendored
|
@ -4,6 +4,20 @@
|
||||||
#include "third_party/nsync/time.h"
|
#include "third_party/nsync/time.h"
|
||||||
COSMOPOLITAN_C_START_
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
|
/* XNU ulock (used by cosmo futexes) is an internal API, however:
|
||||||
|
|
||||||
|
1. Unlike GCD it's cancelable i.e. can be EINTR'd by signals
|
||||||
|
2. We have no choice but to use ulock for joining threads
|
||||||
|
3. Grand Central Dispatch requires a busy loop workaround
|
||||||
|
4. ulock makes our mutexes use 20% more system time (meh)
|
||||||
|
5. ulock makes our mutexes use 40% less wall time (good)
|
||||||
|
6. ulock makes our mutexes use 64% less user time (woop)
|
||||||
|
|
||||||
|
ulock is an outstanding system call that must be used.
|
||||||
|
gcd is not an acceptable alternative to ulock. */
|
||||||
|
|
||||||
|
#define NSYNC_USE_GRAND_CENTRAL 0
|
||||||
|
|
||||||
bool nsync_mu_semaphore_init_futex(nsync_semaphore *);
|
bool nsync_mu_semaphore_init_futex(nsync_semaphore *);
|
||||||
errno_t nsync_mu_semaphore_p_futex(nsync_semaphore *);
|
errno_t nsync_mu_semaphore_p_futex(nsync_semaphore *);
|
||||||
errno_t nsync_mu_semaphore_p_with_deadline_futex(nsync_semaphore *, nsync_time);
|
errno_t nsync_mu_semaphore_p_with_deadline_futex(nsync_semaphore *, nsync_time);
|
||||||
|
|
Loading…
Reference in a new issue