mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Make Cosmo mutexes competitive with Apple Libc
While we have always licked glibc and musl libc on gnu/systemd sadly the Apple Libc implementation of pthread_mutex_t is better than ours. It may be due to how the XNU kernel and M2 microprocessor are in league when it comes to scheduling processes and the NSYNC behavior is being penalized. We can solve this by leaning more heavily on ulock using Drepper's algo. It's kind of ironic that Linux's official mutexes work terribly on Linux but almost as good as Apple Libc if used on MacOS.
This commit is contained in:
parent
2ec413b5a9
commit
90460ceb3c
13 changed files with 349 additions and 202 deletions
2
Makefile
2
Makefile
|
@ -428,7 +428,7 @@ HTAGS: o/$(MODE)/hdrs-old.txt $(filter-out third_party/libcxx/%,$(HDRS)) #o/$(MO
|
||||||
|
|
||||||
loc: private .UNSANDBOXED = 1
|
loc: private .UNSANDBOXED = 1
|
||||||
loc: o/$(MODE)/tool/build/summy
|
loc: o/$(MODE)/tool/build/summy
|
||||||
find -name \*.h -or -name \*.c -or -name \*.S | \
|
find -name \*.h -or -name \*.hpp -or -name \*.c -or -name \*.cc -or -name \*.cpp -or -name \*.S -or -name \*.mk | \
|
||||||
$(XARGS) wc -l | grep total | awk '{print $$1}' | $<
|
$(XARGS) wc -l | grep total | awk '{print $$1}' | $<
|
||||||
|
|
||||||
# PLEASE: MAINTAIN TOPOLOGICAL ORDER
|
# PLEASE: MAINTAIN TOPOLOGICAL ORDER
|
||||||
|
|
|
@ -33,13 +33,16 @@
|
||||||
|
|
||||||
static void pthread_mutex_lock_spin(atomic_int *word) {
|
static void pthread_mutex_lock_spin(atomic_int *word) {
|
||||||
int backoff = 0;
|
int backoff = 0;
|
||||||
for (;;) {
|
if (atomic_exchange_explicit(word, 1, memory_order_acquire)) {
|
||||||
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
|
LOCKTRACE("acquiring pthread_mutex_lock_spin(%t)...", word);
|
||||||
break;
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (!atomic_load_explicit(word, memory_order_relaxed))
|
for (;;) {
|
||||||
|
if (!atomic_load_explicit(word, memory_order_relaxed))
|
||||||
|
break;
|
||||||
|
backoff = pthread_delay_np(word, backoff);
|
||||||
|
}
|
||||||
|
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
|
||||||
break;
|
break;
|
||||||
backoff = pthread_delay_np(word, backoff);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -47,14 +50,11 @@ static void pthread_mutex_lock_spin(atomic_int *word) {
|
||||||
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||||
// slightly improved to attempt acquiring multiple times b4 syscall
|
// slightly improved to attempt acquiring multiple times b4 syscall
|
||||||
static void pthread_mutex_lock_drepper(atomic_int *futex, char pshare) {
|
static void pthread_mutex_lock_drepper(atomic_int *futex, char pshare) {
|
||||||
int word;
|
int word = 0;
|
||||||
for (int i = 0; i < 4; ++i) {
|
if (atomic_compare_exchange_strong_explicit(
|
||||||
word = 0;
|
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||||
if (atomic_compare_exchange_strong_explicit(
|
return;
|
||||||
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
LOCKTRACE("acquiring pthread_mutex_lock_drepper(%t)...", futex);
|
||||||
return;
|
|
||||||
pthread_pause_np();
|
|
||||||
}
|
|
||||||
if (word == 1)
|
if (word == 1)
|
||||||
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||||
while (word > 0) {
|
while (word > 0) {
|
||||||
|
@ -70,6 +70,7 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
|
||||||
uint64_t lock;
|
uint64_t lock;
|
||||||
int backoff = 0;
|
int backoff = 0;
|
||||||
int me = gettid();
|
int me = gettid();
|
||||||
|
bool once = false;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (MUTEX_OWNER(word) == me) {
|
if (MUTEX_OWNER(word) == me) {
|
||||||
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
|
||||||
|
@ -95,6 +96,10 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
|
||||||
mutex->_pid = __pid;
|
mutex->_pid = __pid;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (!once) {
|
||||||
|
LOCKTRACE("acquiring pthread_mutex_lock_recursive(%t)...", mutex);
|
||||||
|
once = true;
|
||||||
|
}
|
||||||
for (;;) {
|
for (;;) {
|
||||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||||
if (MUTEX_OWNER(word) == me)
|
if (MUTEX_OWNER(word) == me)
|
||||||
|
@ -117,8 +122,12 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||||
_weaken(nsync_mu_lock)) {
|
_weaken(nsync_mu_lock)) {
|
||||||
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
|
// on apple silicon we should just put our faith in ulock
|
||||||
return 0;
|
// otherwise *nsync gets struck down by the eye of sauron
|
||||||
|
if (!IsXnuSilicon()) {
|
||||||
|
_weaken(nsync_mu_lock)((nsync_mu *)mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -169,15 +178,26 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||||
*
|
*
|
||||||
* This function does nothing in vfork() children.
|
* This function does nothing in vfork() children.
|
||||||
*
|
*
|
||||||
|
* You can debug locks the acquisition of locks by building your program
|
||||||
|
* with `cosmocc -mdbg` and passing the `--strace` flag to your program.
|
||||||
|
* This will cause a line to be logged each time a mutex or spin lock is
|
||||||
|
* locked or unlocked. When locking, this is printed after the lock gets
|
||||||
|
* acquired. The entry to the lock operation will be logged too but only
|
||||||
|
* if the lock couldn't be immediately acquired. Lock logging works best
|
||||||
|
* when `mutex` refers to a static variable, in which case its name will
|
||||||
|
* be printed in the log.
|
||||||
|
*
|
||||||
* @return 0 on success, or error number on failure
|
* @return 0 on success, or error number on failure
|
||||||
* @see pthread_spin_lock()
|
* @see pthread_spin_lock()
|
||||||
* @vforksafe
|
* @vforksafe
|
||||||
*/
|
*/
|
||||||
errno_t pthread_mutex_lock(pthread_mutex_t *mutex) {
|
errno_t pthread_mutex_lock(pthread_mutex_t *mutex) {
|
||||||
if (__vforked)
|
if (!__vforked) {
|
||||||
|
errno_t err = pthread_mutex_lock_impl(mutex);
|
||||||
|
LOCKTRACE("pthread_mutex_lock(%t) → %s", mutex, DescribeErrno(err));
|
||||||
|
return err;
|
||||||
|
} else {
|
||||||
|
LOCKTRACE("skipping pthread_mutex_lock(%t) due to vfork", mutex);
|
||||||
return 0;
|
return 0;
|
||||||
LOCKTRACE("acquiring %t...", mutex);
|
}
|
||||||
errno_t err = pthread_mutex_lock_impl(mutex);
|
|
||||||
LOCKTRACE("pthread_mutex_lock(%t) → %s", mutex, DescribeErrno(err));
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,10 +97,14 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL &&
|
||||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||||
_weaken(nsync_mu_trylock)) {
|
_weaken(nsync_mu_trylock)) {
|
||||||
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
|
// on apple silicon we should just put our faith in ulock
|
||||||
return 0;
|
// otherwise *nsync gets struck down by the eye of sauron
|
||||||
} else {
|
if (!IsXnuSilicon()) {
|
||||||
return EBUSY;
|
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex)) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return EBUSY;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -91,8 +91,12 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
||||||
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
if (MUTEX_TYPE(word) == PTHREAD_MUTEX_NORMAL && //
|
||||||
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE && //
|
||||||
_weaken(nsync_mu_unlock)) {
|
_weaken(nsync_mu_unlock)) {
|
||||||
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
|
// on apple silicon we should just put our faith in ulock
|
||||||
return 0;
|
// otherwise *nsync gets struck down by the eye of sauron
|
||||||
|
if (!IsXnuSilicon()) {
|
||||||
|
_weaken(nsync_mu_unlock)((nsync_mu *)mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/atomic.h"
|
#include "libc/intrin/atomic.h"
|
||||||
|
#include "libc/intrin/strace.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -29,8 +30,17 @@
|
||||||
* pthread_spin_unlock(&lock);
|
* pthread_spin_unlock(&lock);
|
||||||
* pthread_spin_destroy(&lock);
|
* pthread_spin_destroy(&lock);
|
||||||
*
|
*
|
||||||
* This function has undefined behavior when `spin` wasn't intialized,
|
* This function has undefined behavior when `spin` wasn't intialized or
|
||||||
* was destroyed, or if the lock's already held by the calling thread.
|
* was destroyed, and if the lock is already held by the calling thread.
|
||||||
|
*
|
||||||
|
* You can debug locks the acquisition of locks by building your program
|
||||||
|
* with `cosmocc -mdbg` and passing the `--strace` flag to your program.
|
||||||
|
* This will cause a line to be logged each time a mutex or spin lock is
|
||||||
|
* locked or unlocked. When locking, this is printed after the lock gets
|
||||||
|
* acquired. The entry to the lock operation will be logged too but only
|
||||||
|
* if the lock couldn't be immediately acquired. Lock logging works best
|
||||||
|
* when `mutex` refers to a static variable, in which case its name will
|
||||||
|
* be printed in the log.
|
||||||
*
|
*
|
||||||
* @return 0 on success, or errno on error
|
* @return 0 on success, or errno on error
|
||||||
* @see pthread_spin_trylock
|
* @see pthread_spin_trylock
|
||||||
|
@ -38,12 +48,16 @@
|
||||||
* @see pthread_spin_init
|
* @see pthread_spin_init
|
||||||
*/
|
*/
|
||||||
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
|
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
|
||||||
for (;;) {
|
if (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) {
|
||||||
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
|
LOCKTRACE("acquiring pthread_spin_lock(%t)...", spin);
|
||||||
break;
|
for (;;) {
|
||||||
for (;;)
|
for (;;)
|
||||||
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
|
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
|
||||||
|
break;
|
||||||
|
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
LOCKTRACE("pthread_spin_lock(%t)", spin);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/atomic.h"
|
#include "libc/intrin/atomic.h"
|
||||||
|
#include "libc/intrin/strace.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -29,6 +30,7 @@
|
||||||
* @see pthread_spin_lock
|
* @see pthread_spin_lock
|
||||||
*/
|
*/
|
||||||
errno_t pthread_spin_unlock(pthread_spinlock_t *spin) {
|
errno_t pthread_spin_unlock(pthread_spinlock_t *spin) {
|
||||||
|
LOCKTRACE("pthread_spin_unlock(%t)", spin);
|
||||||
atomic_store_explicit(&spin->_lock, 0, memory_order_release);
|
atomic_store_explicit(&spin->_lock, 0, memory_order_release);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,13 +5,19 @@
|
||||||
#define SYSDEBUG 0
|
#define SYSDEBUG 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define _NTTRACE 0 /* not configurable w/ flag yet */
|
#ifdef MODE_DBG
|
||||||
#define _POLLTRACE 0 /* not configurable w/ flag yet */
|
#define _STRACE_VERBOSE 1
|
||||||
#define _DATATRACE 1 /* not configurable w/ flag yet */
|
#else
|
||||||
#define _LOCKTRACE 0 /* not configurable w/ flag yet */
|
#define _STRACE_VERBOSE 0
|
||||||
#define _STDIOTRACE 0 /* not configurable w/ flag yet */
|
#endif
|
||||||
#define _KERNTRACE 0 /* not configurable w/ flag yet */
|
|
||||||
#define _TIMETRACE 0 /* not configurable w/ flag yet */
|
#define _NTTRACE _STRACE_VERBOSE /* not configurable w/ flag yet */
|
||||||
|
#define _KERNTRACE _STRACE_VERBOSE /* not configurable w/ flag yet */
|
||||||
|
#define _POLLTRACE _STRACE_VERBOSE /* not configurable w/ flag yet */
|
||||||
|
#define _LOCKTRACE _STRACE_VERBOSE /* not configurable w/ flag yet */
|
||||||
|
#define _DATATRACE 1 /* not configurable w/ flag yet */
|
||||||
|
#define _STDIOTRACE 0 /* not configurable w/ flag yet */
|
||||||
|
#define _TIMETRACE 0 /* not configurable w/ flag yet */
|
||||||
|
|
||||||
#define STRACE_PROLOGUE "%rSYS %6P %6H %'18T "
|
#define STRACE_PROLOGUE "%rSYS %6P %6H %'18T "
|
||||||
|
|
||||||
|
@ -30,9 +36,10 @@ COSMOPOLITAN_C_START_
|
||||||
((void)(SYSDEBUG && _POLLTRACE && strace_enabled(0) > 0 && \
|
((void)(SYSDEBUG && _POLLTRACE && strace_enabled(0) > 0 && \
|
||||||
(__stracef(STRACE_PROLOGUE FMT "\n", ##__VA_ARGS__), 0)))
|
(__stracef(STRACE_PROLOGUE FMT "\n", ##__VA_ARGS__), 0)))
|
||||||
|
|
||||||
#define KERNTRACE(FMT, ...) \
|
#define KERNTRACE(FMT, ...) \
|
||||||
((void)(SYSDEBUG && _KERNTRACE && strace_enabled(0) > 0 && \
|
((void)(SYSDEBUG && _KERNTRACE && strace_enabled(0) > 0 && \
|
||||||
(__stracef(STRACE_PROLOGUE FMT "\n", ##__VA_ARGS__), 0)))
|
(__stracef(STRACE_PROLOGUE "\e[2m" FMT "\e[0m\n", ##__VA_ARGS__), \
|
||||||
|
0)))
|
||||||
|
|
||||||
#define STDIOTRACE(FMT, ...) \
|
#define STDIOTRACE(FMT, ...) \
|
||||||
((void)(SYSDEBUG && _STDIOTRACE && strace_enabled(0) > 0 && \
|
((void)(SYSDEBUG && _STDIOTRACE && strace_enabled(0) > 0 && \
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/dce.h"
|
||||||
#include "libc/intrin/atomic.h"
|
#include "libc/intrin/atomic.h"
|
||||||
#include "libc/limits.h"
|
#include "libc/limits.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
@ -44,7 +45,7 @@ errno_t pthread_cond_broadcast(pthread_cond_t *cond) {
|
||||||
#if PTHREAD_USE_NSYNC
|
#if PTHREAD_USE_NSYNC
|
||||||
// favor *NSYNC if this is a process private condition variable
|
// favor *NSYNC if this is a process private condition variable
|
||||||
// if using Mike Burrows' code isn't possible, use a naive impl
|
// if using Mike Burrows' code isn't possible, use a naive impl
|
||||||
if (!cond->_pshared) {
|
if (!cond->_pshared && !IsXnuSilicon()) {
|
||||||
nsync_cv_broadcast((nsync_cv *)cond);
|
nsync_cv_broadcast((nsync_cv *)cond);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/dce.h"
|
||||||
#include "libc/intrin/atomic.h"
|
#include "libc/intrin/atomic.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
#include "third_party/nsync/cv.h"
|
#include "third_party/nsync/cv.h"
|
||||||
|
@ -43,7 +44,7 @@ errno_t pthread_cond_signal(pthread_cond_t *cond) {
|
||||||
#if PTHREAD_USE_NSYNC
|
#if PTHREAD_USE_NSYNC
|
||||||
// favor *NSYNC if this is a process private condition variable
|
// favor *NSYNC if this is a process private condition variable
|
||||||
// if using Mike Burrows' code isn't possible, use a naive impl
|
// if using Mike Burrows' code isn't possible, use a naive impl
|
||||||
if (!cond->_pshared) {
|
if (!cond->_pshared && !IsXnuSilicon()) {
|
||||||
nsync_cv_signal((nsync_cv *)cond);
|
nsync_cv_signal((nsync_cv *)cond);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/calls/calls.h"
|
#include "libc/calls/calls.h"
|
||||||
#include "libc/calls/cp.internal.h"
|
#include "libc/calls/cp.internal.h"
|
||||||
|
#include "libc/dce.h"
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/thread/lock.h"
|
#include "libc/thread/lock.h"
|
||||||
#include "libc/thread/posixthread.internal.h"
|
#include "libc/thread/posixthread.internal.h"
|
||||||
|
@ -122,7 +123,7 @@ errno_t pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
|
||||||
#if PTHREAD_USE_NSYNC
|
#if PTHREAD_USE_NSYNC
|
||||||
// favor *NSYNC if this is a process private condition variable
|
// favor *NSYNC if this is a process private condition variable
|
||||||
// if using Mike Burrows' code isn't possible, use a naive impl
|
// if using Mike Burrows' code isn't possible, use a naive impl
|
||||||
if (!cond->_pshared) {
|
if (!cond->_pshared && !IsXnuSilicon()) {
|
||||||
err = nsync_cv_wait_with_deadline(
|
err = nsync_cv_wait_with_deadline(
|
||||||
(nsync_cv *)cond, (nsync_mu *)mutex,
|
(nsync_cv *)cond, (nsync_mu *)mutex,
|
||||||
abstime ? *abstime : nsync_time_no_deadline, 0);
|
abstime ? *abstime : nsync_time_no_deadline, 0);
|
||||||
|
|
|
@ -62,7 +62,7 @@ TEST(sched_getcpu, affinity_test) {
|
||||||
// KLUDGE TEST
|
// KLUDGE TEST
|
||||||
|
|
||||||
#define THREADS 2
|
#define THREADS 2
|
||||||
#define ITERATIONS 10000
|
#define ITERATIONS 100000
|
||||||
|
|
||||||
int g_hits[256];
|
int g_hits[256];
|
||||||
atomic_int g_sync;
|
atomic_int g_sync;
|
||||||
|
|
|
@ -1,119 +1,245 @@
|
||||||
#include <assert.h>
|
// config
|
||||||
|
#define USE POSIX
|
||||||
|
#define ITERATIONS 50000
|
||||||
|
#define THREADS 10
|
||||||
|
|
||||||
|
// USE may be
|
||||||
|
#define SPIN 1
|
||||||
|
#define FUTEX 2
|
||||||
|
#define POSIX 3
|
||||||
|
|
||||||
|
#ifdef __COSMOPOLITAN__
|
||||||
#include <cosmo.h>
|
#include <cosmo.h>
|
||||||
#include <linux/futex.h>
|
#include "third_party/nsync/futex.internal.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdatomic.h>
|
#include <stdatomic.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/time.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "third_party/nsync/futex.internal.h"
|
|
||||||
|
|
||||||
// arm fleet
|
#ifdef __linux__
|
||||||
// with futexes
|
#include <linux/futex.h>
|
||||||
// 30 threads / 100000 iterations
|
#include <sys/syscall.h>
|
||||||
//
|
static inline long nsync_futex_wait_(atomic_int *uaddr, int val, char pshare,
|
||||||
// 46,481 us real
|
const struct timespec *timeout) {
|
||||||
// 68,745 us user
|
return syscall(SYS_futex, uaddr, pshare ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE,
|
||||||
// 586,871 us sys
|
val, timeout, NULL, 0);
|
||||||
// footek_test on studio.test. 585 µs 13'597 µs 57'473 µs
|
}
|
||||||
// 389,619 us real
|
static inline long nsync_futex_wake_(atomic_int *uaddr, int num_to_wake,
|
||||||
// 839,848 us user
|
char pshare) {
|
||||||
// 679,112 us sys
|
return syscall(SYS_futex, uaddr, pshare ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE,
|
||||||
// footek_test on pi5.test. 335 µs 13'034 µs 432'358 µs
|
num_to_wake, NULL, NULL, 0);
|
||||||
// 463,799 us real
|
}
|
||||||
// 1,259,267 us user
|
#endif
|
||||||
// 547,681 us sys
|
|
||||||
// footek_test on pi.test. 479 µs 16'539 µs 476'395 µs
|
|
||||||
// 1,256,134 us real
|
|
||||||
// 3,770,473 us user
|
|
||||||
// 1,214,755 us sys
|
|
||||||
// footek_test on freebsdarm.test. 364 µs 16'898 µs 1'288'594 µs
|
|
||||||
|
|
||||||
// arm fleet
|
|
||||||
// without futexes
|
|
||||||
// 30 threads / 100000 iterations
|
|
||||||
//
|
|
||||||
// 1,282,084 us real
|
|
||||||
// 29,359,582 us user
|
|
||||||
// 34,553 us sys
|
|
||||||
// footek_test on studio.test. 961 µs 12'907 µs 1'287'983 µs
|
|
||||||
// 4,070,988 us real
|
|
||||||
// 16,203,990 us user
|
|
||||||
// 7,999 us sys
|
|
||||||
// footek_test on pi.test. 459 µs 16'376 µs 4'095'512 µs
|
|
||||||
// 7,012,493 us real
|
|
||||||
// 27,936,725 us user
|
|
||||||
// 7,871 us sys
|
|
||||||
// footek_test on freebsdarm.test. 502 µs 16'446 µs 7'051'545 µs
|
|
||||||
|
|
||||||
// x86 fleet
|
// x86 fleet
|
||||||
// with futexes
|
// with spin lock
|
||||||
// 30 threads / 100000 iterations
|
// 30 threads / 100000 iterations
|
||||||
//
|
//
|
||||||
// 146,015 us real
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX
|
||||||
// 169,427 us user
|
// 1,570,224 us real
|
||||||
// 68,939 us sys
|
// 42,690,880 us user
|
||||||
// footek_test on rhel7.test. 376 µs 2'259 µs 153'024 µs
|
// 1,999 us sys
|
||||||
// 144,917 us real
|
|
||||||
// 383,317 us user
|
|
||||||
// 191,203 us sys
|
|
||||||
// footek_test on xnu.test. 11'143 µs 9'159 µs 164'865 µs
|
|
||||||
// 244,286 us real
|
|
||||||
// 405,395 us user
|
|
||||||
// 956,122 us sys
|
|
||||||
// footek_test on freebsd.test. 394 µs 2'165 µs 256'227 µs
|
|
||||||
// 209,095 us real
|
|
||||||
// 616,634 us user
|
|
||||||
// 9,945 us sys
|
|
||||||
// footek_test on netbsd.test. 502 µs 2'020 µs 261'895 µs
|
|
||||||
// 344,876 us real
|
|
||||||
// 50,000 us user
|
|
||||||
// 1,240,000 us sys
|
|
||||||
// footek_test on openbsd.test. 457 µs 2'737 µs 396'342 µs
|
|
||||||
// 1,193,906 us real
|
|
||||||
// 17,546,875 us user
|
|
||||||
// 3,000,000 us sys
|
|
||||||
// footek_test on win10.test. 462 µs 59'528 µs 1'348'265 µs
|
|
||||||
|
|
||||||
// x86 fleet
|
|
||||||
// without futexes
|
|
||||||
// 30 threads / 100000 iterations
|
|
||||||
//
|
//
|
||||||
|
// footek_test on rhel7.test. 423 µs 2'638 µs 912'241 µs
|
||||||
// 897,815 us real
|
// 897,815 us real
|
||||||
// 1,763,705 us user
|
// 1,763,705 us user
|
||||||
// 9,696 us sys
|
// 9,696 us sys
|
||||||
// footek_test on rhel7.test. 423 µs 2'638 µs 912'241 µs
|
//
|
||||||
// 790,332 us real
|
// footek_test on xnu.test. 98'468 µs 5'242 µs 5'191'724 µs
|
||||||
// 2,359,967 us user
|
|
||||||
// 0 us sys
|
|
||||||
// footek_test on netbsd.test. 1'151 µs 2'634 µs 1'014'867 µs
|
|
||||||
// 2,332,724 us real
|
|
||||||
// 9,150,000 us user
|
|
||||||
// 10,000 us sys
|
|
||||||
// footek_test on openbsd.test. 557 µs 3'020 µs 2'554'648 µs
|
|
||||||
// 2,528,863 us real
|
|
||||||
// 56,546,875 us user
|
|
||||||
// 1,671,875 us sys
|
|
||||||
// footek_test on win10.test. 962 µs 9'698 µs 2'751'905 µs
|
|
||||||
// 2,916,033 us real
|
|
||||||
// 17,236,103 us user
|
|
||||||
// 0 us sys
|
|
||||||
// footek_test on freebsd.test. 690 µs 3'011 µs 2'925'997 µs
|
|
||||||
// 4,225,726 us real
|
// 4,225,726 us real
|
||||||
// 16,679,456 us user
|
// 16,679,456 us user
|
||||||
// 16,265 us sys
|
// 16,265 us sys
|
||||||
// footek_test on xnu.test. 98'468 µs 5'242 µs 5'191'724 µs
|
//
|
||||||
|
// footek_test on freebsd.test. 690 µs 3'011 µs 2'925'997 µs
|
||||||
|
// 2,916,033 us real
|
||||||
|
// 17,236,103 us user
|
||||||
|
// 0 us sys
|
||||||
|
//
|
||||||
|
// footek_test on netbsd.test. 1'151 µs 2'634 µs 1'014'867 µs
|
||||||
|
// 790,332 us real
|
||||||
|
// 2,359,967 us user
|
||||||
|
// 0 us sys
|
||||||
|
//
|
||||||
|
// footek_test on openbsd.test. 557 µs 3'020 µs 2'554'648 µs
|
||||||
|
// 2,332,724 us real
|
||||||
|
// 9,150,000 us user
|
||||||
|
// 10,000 us sys
|
||||||
|
//
|
||||||
|
// footek_test on win10.test. 962 µs 9'698 µs 2'751'905 µs
|
||||||
|
// 2,528,863 us real
|
||||||
|
// 56,546,875 us user
|
||||||
|
// 1,671,875 us sys
|
||||||
|
|
||||||
#define SPIN 1
|
// x86 fleet
|
||||||
#define FUTEX 2
|
// with futexes
|
||||||
#define NSYNC 3
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX
|
||||||
|
// 100,746 us real
|
||||||
|
// 234,451 us user
|
||||||
|
// 2,638,333 us sys
|
||||||
|
//
|
||||||
|
// footek_test on rhel7.test. 376 µs 2'259 µs 153'024 µs
|
||||||
|
// 146,015 us real
|
||||||
|
// 169,427 us user
|
||||||
|
// 68,939 us sys
|
||||||
|
//
|
||||||
|
// footek_test on xnu.test. 11'143 µs 9'159 µs 164'865 µs
|
||||||
|
// 144,917 us real
|
||||||
|
// 383,317 us user
|
||||||
|
// 191,203 us sys
|
||||||
|
//
|
||||||
|
// footek_test on freebsd.test. 394 µs 2'165 µs 256'227 µs
|
||||||
|
// 244,286 us real
|
||||||
|
// 405,395 us user
|
||||||
|
// 956,122 us sys
|
||||||
|
//
|
||||||
|
// footek_test on netbsd.test. 502 µs 2'020 µs 261'895 µs
|
||||||
|
// 209,095 us real
|
||||||
|
// 616,634 us user
|
||||||
|
// 9,945 us sys
|
||||||
|
//
|
||||||
|
// footek_test on openbsd.test. 457 µs 2'737 µs 396'342 µs
|
||||||
|
// 344,876 us real
|
||||||
|
// 50,000 us user
|
||||||
|
// 1,240,000 us sys
|
||||||
|
//
|
||||||
|
// footek_test on win10.test. 462 µs 59'528 µs 1'348'265 µs
|
||||||
|
// 1,193,906 us real
|
||||||
|
// 17,546,875 us user
|
||||||
|
// 3,000,000 us sys
|
||||||
|
|
||||||
#define USE NSYNC
|
// x86 fleet
|
||||||
|
// with posix
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX (glibc)
|
||||||
|
// 111,560 us real
|
||||||
|
// 153,985 us user
|
||||||
|
// 2,988,121 us sys
|
||||||
|
//
|
||||||
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX (musl)
|
||||||
|
// 392,765 us real
|
||||||
|
// 1,885,558 us user
|
||||||
|
// 9,667,865 us sys
|
||||||
|
//
|
||||||
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX (cosmo)
|
||||||
|
// 40,965 us real
|
||||||
|
// 47,168 us user
|
||||||
|
// 25,398 us sys
|
||||||
|
//
|
||||||
|
// footek_test on rhel7.test. 683 µs 1'340 µs 105'977 µs
|
||||||
|
// 101,934 us real
|
||||||
|
// 104,771 us user
|
||||||
|
// 4,068 us sys
|
||||||
|
//
|
||||||
|
// footek_test on xnu.test. 2'054 µs 5'352 µs 210'306 µs
|
||||||
|
// 181,540 us real
|
||||||
|
// 216,236 us user
|
||||||
|
// 127,344 us sys
|
||||||
|
//
|
||||||
|
// footek_test on freebsd.test. 613 µs 2'120 µs 133'272 µs
|
||||||
|
// 126,803 us real
|
||||||
|
// 3,100 us user
|
||||||
|
// 176,744 us sys
|
||||||
|
//
|
||||||
|
// footek_test on netbsd.test. 350 µs 3'570 µs 262'186 µs
|
||||||
|
// 199,882 us real
|
||||||
|
// 138,178 us user
|
||||||
|
// 329,501 us sys
|
||||||
|
//
|
||||||
|
// footek_test on openbsd.test. 454 µs 2'185 µs 153'258 µs
|
||||||
|
// 138,619 us real
|
||||||
|
// 30,000 us user
|
||||||
|
// 110,000 us sys
|
||||||
|
//
|
||||||
|
// footek_test on win10.test. 233 µs 6'133 µs 260'812 µs
|
||||||
|
// 156,382 us real
|
||||||
|
// 312,500 us user
|
||||||
|
// 31,250 us sys
|
||||||
|
|
||||||
#define THREADS 10
|
// arm fleet
|
||||||
#define ITERATIONS 50000
|
// with spin lock
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// footek_test on studio.test. 961 µs 12'907 µs 1'287'983 µs
|
||||||
|
// 1,282,084 us real
|
||||||
|
// 29,359,582 us user
|
||||||
|
// 34,553 us sys
|
||||||
|
//
|
||||||
|
// footek_test on pi.test. 459 µs 16'376 µs 4'095'512 µs
|
||||||
|
// 4,070,988 us real
|
||||||
|
// 16,203,990 us user
|
||||||
|
// 7,999 us sys
|
||||||
|
//
|
||||||
|
// footek_test on freebsdarm.test. 502 µs 16'446 µs 7'051'545 µs
|
||||||
|
// 7,012,493 us real
|
||||||
|
// 27,936,725 us user
|
||||||
|
// 7,871 us sys
|
||||||
|
|
||||||
|
// arm fleet
|
||||||
|
// with futexes
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// footek_test on studio.test. 585 µs 13'597 µs 57'473 µs
|
||||||
|
// 46,481 us real
|
||||||
|
// 68,745 us user
|
||||||
|
// 586,871 us sys
|
||||||
|
//
|
||||||
|
// footek_test on pi5.test. 335 µs 13'034 µs 432'358 µs
|
||||||
|
// 389,619 us real
|
||||||
|
// 839,848 us user
|
||||||
|
// 679,112 us sys
|
||||||
|
//
|
||||||
|
// footek_test on pi.test. 479 µs 16'539 µs 476'395 µs
|
||||||
|
// 463,799 us real
|
||||||
|
// 1,259,267 us user
|
||||||
|
// 547,681 us sys
|
||||||
|
//
|
||||||
|
// footek_test on freebsdarm.test. 364 µs 16'898 µs 1'288'594 µs
|
||||||
|
// 1,256,134 us real
|
||||||
|
// 3,770,473 us user
|
||||||
|
// 1,214,755 us sys
|
||||||
|
|
||||||
|
// arm fleet
|
||||||
|
// with posix
|
||||||
|
// 30 threads / 100000 iterations
|
||||||
|
//
|
||||||
|
// footek_test on Apple M2 Ultra (Apple Libc)
|
||||||
|
// 45,443 us real
|
||||||
|
// 30,201 us user
|
||||||
|
// 864,650 us sys
|
||||||
|
//
|
||||||
|
// footek_test on Apple M2 Ultra (Cosmo Libc)
|
||||||
|
// 65,118 us real
|
||||||
|
// 77,891 us user
|
||||||
|
// 1,023,575 us sys
|
||||||
|
//
|
||||||
|
// footek_test on pi5.test. 407 µs 12'661 µs 198'133 µs
|
||||||
|
// 152,791 us real
|
||||||
|
// 143,678 us user
|
||||||
|
// 14,736 us sys
|
||||||
|
//
|
||||||
|
// footek_test on studio.test. 463 µs 13'286 µs 234'742 µs
|
||||||
|
// 227,916 us real
|
||||||
|
// 294,162 us user
|
||||||
|
// 155,062 us sys
|
||||||
|
//
|
||||||
|
// footek_test on pi.test. 374 µs 15'720 µs 249'245 µs
|
||||||
|
// 233,504 us real
|
||||||
|
// 301,072 us user
|
||||||
|
// 187,153 us sys
|
||||||
|
//
|
||||||
|
// footek_test on freebsdarm.test. 328 µs 16'614 µs 918'647 µs
|
||||||
|
// 877,124 us real
|
||||||
|
// 1,377,338 us user
|
||||||
|
// 798,230 us sys
|
||||||
|
|
||||||
#define MUTEX_LOCKED(word) ((word) & 8)
|
#define MUTEX_LOCKED(word) ((word) & 8)
|
||||||
#define MUTEX_WAITING(word) ((word) & 16)
|
#define MUTEX_WAITING(word) ((word) & 16)
|
||||||
|
@ -129,7 +255,6 @@ void lock(atomic_int *futex) {
|
||||||
if (atomic_compare_exchange_strong_explicit(
|
if (atomic_compare_exchange_strong_explicit(
|
||||||
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
||||||
return;
|
return;
|
||||||
pthread_pause_np();
|
|
||||||
}
|
}
|
||||||
if (word == 1)
|
if (word == 1)
|
||||||
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
||||||
|
@ -155,11 +280,11 @@ void unlock(atomic_int *futex) {
|
||||||
|
|
||||||
int g_chores;
|
int g_chores;
|
||||||
atomic_int g_lock;
|
atomic_int g_lock;
|
||||||
pthread_mutex_t g_locker;
|
pthread_mutex_t g_locker = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
void *worker(void *arg) {
|
void *worker(void *arg) {
|
||||||
for (int i = 0; i < ITERATIONS; ++i) {
|
for (int i = 0; i < ITERATIONS; ++i) {
|
||||||
#if USE == NSYNC
|
#if USE == POSIX
|
||||||
pthread_mutex_lock(&g_locker);
|
pthread_mutex_lock(&g_locker);
|
||||||
++g_chores;
|
++g_chores;
|
||||||
pthread_mutex_unlock(&g_locker);
|
pthread_mutex_unlock(&g_locker);
|
||||||
|
@ -172,6 +297,20 @@ void *worker(void *arg) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct timeval tub(struct timeval a, struct timeval b) {
|
||||||
|
a.tv_sec -= b.tv_sec;
|
||||||
|
if (a.tv_usec < b.tv_usec) {
|
||||||
|
a.tv_usec += 1000000;
|
||||||
|
a.tv_sec--;
|
||||||
|
}
|
||||||
|
a.tv_usec -= b.tv_usec;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
long tomicros(struct timeval x) {
|
||||||
|
return x.tv_sec * 1000000ul + x.tv_usec;
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
struct timeval start;
|
struct timeval start;
|
||||||
gettimeofday(&start, 0);
|
gettimeofday(&start, 0);
|
||||||
|
@ -181,68 +320,20 @@ int main() {
|
||||||
pthread_create(&th[i], 0, worker, 0);
|
pthread_create(&th[i], 0, worker, 0);
|
||||||
for (int i = 0; i < THREADS; ++i)
|
for (int i = 0; i < THREADS; ++i)
|
||||||
pthread_join(th[i], 0);
|
pthread_join(th[i], 0);
|
||||||
npassert(g_chores == THREADS * ITERATIONS);
|
assert(g_chores == THREADS * ITERATIONS);
|
||||||
|
|
||||||
struct rusage ru;
|
struct rusage ru;
|
||||||
struct timeval end;
|
struct timeval end;
|
||||||
gettimeofday(&end, 0);
|
gettimeofday(&end, 0);
|
||||||
getrusage(RUSAGE_SELF, &ru);
|
getrusage(RUSAGE_SELF, &ru);
|
||||||
printf("%,16ld us real\n"
|
printf("%16ld us real\n"
|
||||||
"%,16ld us user\n"
|
"%16ld us user\n"
|
||||||
"%,16ld us sys\n",
|
"%16ld us sys\n",
|
||||||
timeval_tomicros(timeval_sub(end, start)), //
|
tomicros(tub(end, start)), //
|
||||||
timeval_tomicros(ru.ru_utime), //
|
tomicros(ru.ru_utime), //
|
||||||
timeval_tomicros(ru.ru_stime));
|
tomicros(ru.ru_stime));
|
||||||
|
|
||||||
|
#ifdef __COSMOPOLITAN__
|
||||||
CheckForMemoryLeaks();
|
CheckForMemoryLeaks();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// x86 fleet
|
|
||||||
// with pthread_mutex_t
|
|
||||||
// 30 threads / 100000 iterations
|
|
||||||
//
|
|
||||||
// 177,702 us real
|
|
||||||
// 183,488 us user
|
|
||||||
// 54,921 us sys
|
|
||||||
// footek_test on rhel7.test. 304 µs 2'225 µs 185'809 µs
|
|
||||||
// 191,346 us real
|
|
||||||
// 43,746 us user
|
|
||||||
// 257,012 us sys
|
|
||||||
// footek_test on freebsd.test. 405 µs 2'186 µs 200'568 µs
|
|
||||||
// 194,344 us real
|
|
||||||
// 228,235 us user
|
|
||||||
// 143,203 us sys
|
|
||||||
// footek_test on xnu.test. 33'207 µs 5'164 µs 220'693 µs
|
|
||||||
// 199,882 us real
|
|
||||||
// 138,178 us user
|
|
||||||
// 329,501 us sys
|
|
||||||
// footek_test on netbsd.test. 350 µs 3'570 µs 262'186 µs
|
|
||||||
// 291,255 us real
|
|
||||||
// 70,000 us user
|
|
||||||
// 440,000 us sys
|
|
||||||
// footek_test on openbsd.test. 628 µs 3'232 µs 342'136 µs
|
|
||||||
// 250,072 us real
|
|
||||||
// 437,500 us user
|
|
||||||
// 93,750 us sys
|
|
||||||
// footek_test on win10.test. 996 µs 10'949 µs 398'435 µs
|
|
||||||
|
|
||||||
// arm fleet
|
|
||||||
// with pthread_mutex_t
|
|
||||||
// 30 threads / 100000 iterations
|
|
||||||
//
|
|
||||||
// 88,681 us real
|
|
||||||
// 163,500 us user
|
|
||||||
// 22,183 us sys
|
|
||||||
// footek_test on studio.test. 651 µs 15'086 µs 98'632 µs
|
|
||||||
// 157,701 us real
|
|
||||||
// 215,597 us user
|
|
||||||
// 46,436 us sys
|
|
||||||
// footek_test on pi5.test. 296 µs 13'222 µs 159'805 µs
|
|
||||||
// 699,863 us real
|
|
||||||
// 1,027,981 us user
|
|
||||||
// 648,353 us sys
|
|
||||||
// footek_test on pi.test. 419 µs 16'716 µs 721'851 µs
|
|
||||||
// 843,858 us real
|
|
||||||
// 1,432,362 us user
|
|
||||||
// 696,613 us sys
|
|
||||||
// footek_test on freebsdarm.test. 349 µs 16'613 µs 876'863 µs
|
|
||||||
|
|
2
third_party/nsync/mu.c
vendored
2
third_party/nsync/mu.c
vendored
|
@ -23,6 +23,7 @@
|
||||||
#include "third_party/nsync/mu_semaphore.h"
|
#include "third_party/nsync/mu_semaphore.h"
|
||||||
#include "third_party/nsync/races.internal.h"
|
#include "third_party/nsync/races.internal.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
#include "libc/intrin/strace.h"
|
||||||
#include "third_party/nsync/wait_s.internal.h"
|
#include "third_party/nsync/wait_s.internal.h"
|
||||||
__static_yoink("nsync_notice");
|
__static_yoink("nsync_notice");
|
||||||
|
|
||||||
|
@ -152,6 +153,7 @@ void nsync_mu_lock (nsync_mu *mu) {
|
||||||
if ((old_word&MU_WZERO_TO_ACQUIRE) != 0 ||
|
if ((old_word&MU_WZERO_TO_ACQUIRE) != 0 ||
|
||||||
!ATM_CAS_ACQ (&mu->word, old_word,
|
!ATM_CAS_ACQ (&mu->word, old_word,
|
||||||
(old_word+MU_WADD_TO_ACQUIRE) & ~MU_WCLEAR_ON_ACQUIRE)) {
|
(old_word+MU_WADD_TO_ACQUIRE) & ~MU_WCLEAR_ON_ACQUIRE)) {
|
||||||
|
LOCKTRACE("acquiring nsync_mu_lock(%t)...", mu);
|
||||||
waiter *w = nsync_waiter_new_ ();
|
waiter *w = nsync_waiter_new_ ();
|
||||||
nsync_mu_lock_slow_ (mu, w, 0, nsync_writer_type_);
|
nsync_mu_lock_slow_ (mu, w, 0, nsync_writer_type_);
|
||||||
nsync_waiter_free_ (w);
|
nsync_waiter_free_ (w);
|
||||||
|
|
Loading…
Reference in a new issue