From 751d20d98da3b9fa5b352da48c71c64c713a8904 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 13 Nov 2023 10:57:02 -0800 Subject: [PATCH] Fix nsync_mu_unlock_slow_() on Apple Silicon We torture test dlmalloc() in test/libc/stdio/memory_test.c. That test was crashing on occasion on Apple M1 microprocessors when dlmalloc was using *NSYNC locks. It was relatively easy to spot the cause, which is this one particular compare and swap operation, which needed to change to use sequentially-consistent ordering rather than an acquire barrier --- third_party/dlmalloc/dlmalloc.c | 1 + third_party/dlmalloc/locks.inc | 23 +++++++++++++++++++++++ third_party/nsync/README.cosmo | 2 ++ third_party/nsync/atomic.internal.h | 8 ++++++++ third_party/nsync/mu.c | 4 ++-- 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 8af7dcbff..7f9f738f8 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -23,6 +23,7 @@ #include "libc/thread/thread.h" #include "libc/thread/tls.h" #include "third_party/dlmalloc/vespene.internal.h" +#include "third_party/nsync/mu.h" // clang-format off #define FOOTERS 0 diff --git a/third_party/dlmalloc/locks.inc b/third_party/dlmalloc/locks.inc index 801f72346..a53d5fb59 100644 --- a/third_party/dlmalloc/locks.inc +++ b/third_party/dlmalloc/locks.inc @@ -30,6 +30,8 @@ */ +#ifdef USE_SPIN_LOCKS + #define MLOCK_T atomic_uint static int malloc_wipe(MLOCK_T *lk) { @@ -51,6 +53,27 @@ static int malloc_unlock(MLOCK_T *lk) { return 0; } +#else + +#define MLOCK_T nsync_mu + +static int malloc_wipe(MLOCK_T *lk) { + bzero(lk, sizeof(*lk)); + return 0; +} + +static int malloc_lock(MLOCK_T *lk) { + nsync_mu_lock(lk); + return 0; +} + +static int malloc_unlock(MLOCK_T *lk) { + nsync_mu_unlock(lk); + return 0; +} + +#endif + #define ACQUIRE_LOCK(lk) malloc_lock(lk) #define RELEASE_LOCK(lk) malloc_unlock(lk) #define INITIAL_LOCK(lk) malloc_wipe(lk) diff --git a/third_party/nsync/README.cosmo b/third_party/nsync/README.cosmo index 29f2f8ec1..415a659fb 100644 --- a/third_party/nsync/README.cosmo +++ b/third_party/nsync/README.cosmo @@ -15,6 +15,8 @@ ORIGIN LOCAL CHANGES + - Fix nsync_mu_unlock() on Apple Silicon + - Time APIs were so good that they're now in libc - Double linked list API was so good that it's now in libc diff --git a/third_party/nsync/atomic.internal.h b/third_party/nsync/atomic.internal.h index 072c9a033..eae93f146 100644 --- a/third_party/nsync/atomic.internal.h +++ b/third_party/nsync/atomic.internal.h @@ -86,6 +86,13 @@ static inline int atm_cas_relacq_u32_(nsync_atomic_uint32_ *p, uint32_t o, memory_order_relaxed); } +static inline int atm_cas_seqcst_u32_(nsync_atomic_uint32_ *p, uint32_t o, + uint32_t n) { + return atomic_compare_exchange_strong_explicit(NSYNC_ATOMIC_UINT32_PTR_(p), + &o, n, memory_order_seq_cst, + memory_order_relaxed); +} + #define ATM_CAS_HELPER_(barrier, p, o, n) \ (atm_cas_##barrier##_u32_((p), (o), (n))) @@ -93,6 +100,7 @@ static inline int atm_cas_relacq_u32_(nsync_atomic_uint32_ *p, uint32_t o, #define ATM_CAS_ACQ(p, o, n) ATM_CAS_HELPER_(acq, (p), (o), (n)) #define ATM_CAS_REL(p, o, n) ATM_CAS_HELPER_(rel, (p), (o), (n)) #define ATM_CAS_RELACQ(p, o, n) ATM_CAS_HELPER_(relacq, (p), (o), (n)) +#define ATM_CAS_SEQCST(p, o, n) ATM_CAS_HELPER_(seqcst, (p), (o), (n)) /* Need a cast to remove "const" from some uses. */ #define ATM_LOAD(p) \ diff --git a/third_party/nsync/mu.c b/third_party/nsync/mu.c index 562783377..ce74532a0 100644 --- a/third_party/nsync/mu.c +++ b/third_party/nsync/mu.c @@ -298,8 +298,8 @@ void nsync_mu_unlock_slow_ (nsync_mu *mu, lock_type *l_type) { return; } } else if ((old_word&MU_SPINLOCK) == 0 && - ATM_CAS_ACQ (&mu->word, old_word, - (old_word-early_release_mu)|MU_SPINLOCK|MU_DESIG_WAKER)) { + ATM_CAS_SEQCST (&mu->word, old_word, /* [jart] fixes issues on apple silicon */ + (old_word-early_release_mu)|MU_SPINLOCK|MU_DESIG_WAKER)) { struct Dll *wake; lock_type *wake_type; uint32_t clear_on_release;