Fix nsync_mu_unlock_slow_() on Apple Silicon

We torture test dlmalloc() in test/libc/stdio/memory_test.c. That test
was crashing on occasion on Apple M1 microprocessors when dlmalloc was
using *NSYNC locks. It was relatively easy to spot the cause, which is
this one particular compare and swap operation, which needed to change
to use sequentially-consistent ordering rather than an acquire barrier
This commit is contained in:
Justine Tunney 2023-11-13 10:57:02 -08:00
parent 3b15d31247
commit 751d20d98d
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
5 changed files with 36 additions and 2 deletions

View file

@ -23,6 +23,7 @@
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "libc/thread/tls.h" #include "libc/thread/tls.h"
#include "third_party/dlmalloc/vespene.internal.h" #include "third_party/dlmalloc/vespene.internal.h"
#include "third_party/nsync/mu.h"
// clang-format off // clang-format off
#define FOOTERS 0 #define FOOTERS 0

View file

@ -30,6 +30,8 @@
*/ */
#ifdef USE_SPIN_LOCKS
#define MLOCK_T atomic_uint #define MLOCK_T atomic_uint
static int malloc_wipe(MLOCK_T *lk) { static int malloc_wipe(MLOCK_T *lk) {
@ -51,6 +53,27 @@ static int malloc_unlock(MLOCK_T *lk) {
return 0; return 0;
} }
#else
#define MLOCK_T nsync_mu
static int malloc_wipe(MLOCK_T *lk) {
bzero(lk, sizeof(*lk));
return 0;
}
static int malloc_lock(MLOCK_T *lk) {
nsync_mu_lock(lk);
return 0;
}
static int malloc_unlock(MLOCK_T *lk) {
nsync_mu_unlock(lk);
return 0;
}
#endif
#define ACQUIRE_LOCK(lk) malloc_lock(lk) #define ACQUIRE_LOCK(lk) malloc_lock(lk)
#define RELEASE_LOCK(lk) malloc_unlock(lk) #define RELEASE_LOCK(lk) malloc_unlock(lk)
#define INITIAL_LOCK(lk) malloc_wipe(lk) #define INITIAL_LOCK(lk) malloc_wipe(lk)

View file

@ -15,6 +15,8 @@ ORIGIN
LOCAL CHANGES LOCAL CHANGES
- Fix nsync_mu_unlock() on Apple Silicon
- Time APIs were so good that they're now in libc - Time APIs were so good that they're now in libc
- Double linked list API was so good that it's now in libc - Double linked list API was so good that it's now in libc

View file

@ -86,6 +86,13 @@ static inline int atm_cas_relacq_u32_(nsync_atomic_uint32_ *p, uint32_t o,
memory_order_relaxed); memory_order_relaxed);
} }
static inline int atm_cas_seqcst_u32_(nsync_atomic_uint32_ *p, uint32_t o,
uint32_t n) {
return atomic_compare_exchange_strong_explicit(NSYNC_ATOMIC_UINT32_PTR_(p),
&o, n, memory_order_seq_cst,
memory_order_relaxed);
}
#define ATM_CAS_HELPER_(barrier, p, o, n) \ #define ATM_CAS_HELPER_(barrier, p, o, n) \
(atm_cas_##barrier##_u32_((p), (o), (n))) (atm_cas_##barrier##_u32_((p), (o), (n)))
@ -93,6 +100,7 @@ static inline int atm_cas_relacq_u32_(nsync_atomic_uint32_ *p, uint32_t o,
#define ATM_CAS_ACQ(p, o, n) ATM_CAS_HELPER_(acq, (p), (o), (n)) #define ATM_CAS_ACQ(p, o, n) ATM_CAS_HELPER_(acq, (p), (o), (n))
#define ATM_CAS_REL(p, o, n) ATM_CAS_HELPER_(rel, (p), (o), (n)) #define ATM_CAS_REL(p, o, n) ATM_CAS_HELPER_(rel, (p), (o), (n))
#define ATM_CAS_RELACQ(p, o, n) ATM_CAS_HELPER_(relacq, (p), (o), (n)) #define ATM_CAS_RELACQ(p, o, n) ATM_CAS_HELPER_(relacq, (p), (o), (n))
#define ATM_CAS_SEQCST(p, o, n) ATM_CAS_HELPER_(seqcst, (p), (o), (n))
/* Need a cast to remove "const" from some uses. */ /* Need a cast to remove "const" from some uses. */
#define ATM_LOAD(p) \ #define ATM_LOAD(p) \

View file

@ -298,8 +298,8 @@ void nsync_mu_unlock_slow_ (nsync_mu *mu, lock_type *l_type) {
return; return;
} }
} else if ((old_word&MU_SPINLOCK) == 0 && } else if ((old_word&MU_SPINLOCK) == 0 &&
ATM_CAS_ACQ (&mu->word, old_word, ATM_CAS_SEQCST (&mu->word, old_word, /* [jart] fixes issues on apple silicon */
(old_word-early_release_mu)|MU_SPINLOCK|MU_DESIG_WAKER)) { (old_word-early_release_mu)|MU_SPINLOCK|MU_DESIG_WAKER)) {
struct Dll *wake; struct Dll *wake;
lock_type *wake_type; lock_type *wake_type;
uint32_t clear_on_release; uint32_t clear_on_release;