From 884d89235f12f25c0f827ae0b974320ffa35045c Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 26 Aug 2024 19:59:25 -0700 Subject: [PATCH] Harden against aba problem --- build/run | 2 +- examples/aba.c | 125 +++++++++++++++++++++++++++ libc/intrin/atomic.c | 24 ----- libc/intrin/maps.h | 2 +- libc/intrin/mmap.c | 31 ++++--- net/turfwar/turfwar.c | 42 ++++----- third_party/nsync/common.c | 65 ++++++++------ third_party/nsync/mu_semaphore_sem.c | 6 +- 8 files changed, 212 insertions(+), 85 deletions(-) create mode 100644 examples/aba.c delete mode 100644 libc/intrin/atomic.c diff --git a/build/run b/build/run index c7fc0c292..079bc9991 100755 --- a/build/run +++ b/build/run @@ -4,5 +4,5 @@ UNAMES=$(uname -s) if [ x"$UNAMES" = x"Darwin" ] && [ x"$UNAMEM" = x"arm64" ]; then exec ape "$@" else - exec "$@" + exec rusage "$@" fi diff --git a/examples/aba.c b/examples/aba.c new file mode 100644 index 000000000..b38b26028 --- /dev/null +++ b/examples/aba.c @@ -0,0 +1,125 @@ +#if 0 +/*─────────────────────────────────────────────────────────────────╗ +│ To the extent possible under law, Justine Tunney has waived │ +│ all copyright and related or neighboring rights to this file, │ +│ as it is written in the following disclaimers: │ +│ • http://unlicense.org/ │ +│ • http://creativecommons.org/publicdomain/zero/1.0/ │ +╚─────────────────────────────────────────────────────────────────*/ +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +// lockless push / pop tutorial +// +// this file demonstrates how to create a singly linked list that can be +// pushed and popped across multiple threads, using only atomics. atomic +// operations (rather than using a mutex) make push/pop go faster and it +// ensures asynchronous signal safety too. therefore it will be safe for +// use in a variety of contexts, such as signal handlers. + +#define THREADS 128 +#define ITERATIONS 10000 + +// adjust mask based on alignment of list struct +// +// - 0x00fffffffffffff0 may be used if List* is always 16-byte aligned. +// We know that's the case here, because we call malloc() to create +// every List* object, and malloc() results are always max aligned. +// +// - 0x00fffffffffffff8 may be used if List* is always 8-byte aligned. +// This might be the case if you're pushing and popping stuff that was +// allocated from an array, to avoid malloc() calls. This has one +// fewer byte of safeguards against the ABA problem though. +// +// - 0x00fffffffffff000 may be used if List* is always page aligned. +// This is a good choice if you use mmap() to allocate each List* +// element, since it offers maximum protection against ABA. +// +// - only the highest byte of a 64-bit pointer is safe to use on our +// supported platforms. on most x86 and arm systems, it's possible to +// use the top sixteen bits. however that's not the case on more +// recent high end x86-64 systems that have pml5t. +// +#define MASQUE 0x00fffffffffffff0 + +#define PTR(x) ((uintptr_t)(x) & MASQUE) +#define TAG(x) ROL((uintptr_t)(x) & ~MASQUE, 8) +#define ABA(p, t) ((uintptr_t)(p) | (ROR((uintptr_t)(t), 8) & ~MASQUE)) +#define ROL(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define ROR(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +struct List { + struct List* next; + int count; +}; + +atomic_uintptr_t list; + +void push(struct List* elem) { + uintptr_t tip; + assert(!TAG(elem)); + for (tip = atomic_load_explicit(&list, memory_order_relaxed);;) { + elem->next = (struct List*)PTR(tip); + if (atomic_compare_exchange_weak_explicit( + &list, &tip, ABA(elem, TAG(tip) + 1), memory_order_release, + memory_order_relaxed)) + break; + pthread_pause_np(); + } +} + +struct List* pop(void) { + uintptr_t tip; + struct List* elem; + tip = atomic_load_explicit(&list, memory_order_relaxed); + while ((elem = (struct List*)PTR(tip))) { + if (atomic_compare_exchange_weak_explicit( + &list, &tip, ABA(elem->next, TAG(tip) + 1), memory_order_acquire, + memory_order_relaxed)) + break; + pthread_pause_np(); + } + return elem; +} + +void* tester(void* arg) { + struct List* elem; + for (int i = 0; i < ITERATIONS; ++i) { + while (!(elem = pop())) { + elem = malloc(sizeof(*elem)); + elem->count = 0; + push(elem); + } + elem->count++; + push(elem); + } + return 0; +} + +int main() { + printf("testing aba problem..."); + fflush(stdout); + pthread_t th[THREADS]; + for (int i = 0; i < THREADS; ++i) + pthread_create(&th[i], 0, tester, 0); + for (int i = 0; i < THREADS; ++i) + pthread_join(th[i], 0); + int sum = 0; + struct List* elem; + while ((elem = pop())) { + printf(" %d", elem->count); + sum += elem->count; + free(elem); + } + printf("\n"); + assert(sum == ITERATIONS * THREADS); + printf("you are the dancing queen\n"); + CheckForMemoryLeaks(); +} diff --git a/libc/intrin/atomic.c b/libc/intrin/atomic.c deleted file mode 100644 index f46f74f49..000000000 --- a/libc/intrin/atomic.c +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2024 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/atomic.h" - -bool dog(_Atomic(long) *p, long *e, long w) { - return atomic_compare_exchange_weak_explicit(p, e, w, memory_order_acq_rel, - memory_order_relaxed); -} diff --git a/libc/intrin/maps.h b/libc/intrin/maps.h index 5fc9b721b..c0b0a911d 100644 --- a/libc/intrin/maps.h +++ b/libc/intrin/maps.h @@ -29,7 +29,7 @@ struct Map { struct Maps { struct Tree *maps; _Atomic(uint64_t) lock; - _Atomic(struct Map *) freed; + _Atomic(uintptr_t) freed; size_t count; size_t pages; _Atomic(char *) pick; diff --git a/libc/intrin/mmap.c b/libc/intrin/mmap.c index f10287369..8be86f963 100644 --- a/libc/intrin/mmap.c +++ b/libc/intrin/mmap.c @@ -50,6 +50,13 @@ #define PGUP(x) (((x) + pagesz - 1) & -pagesz) +#define MASQUE 0x00fffffffffffff8 +#define PTR(x) ((uintptr_t)(x) & MASQUE) +#define TAG(x) ROL((uintptr_t)(x) & ~MASQUE, 8) +#define ABA(p, t) ((uintptr_t)(p) | (ROR((uintptr_t)(t), 8) & ~MASQUE)) +#define ROL(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define ROR(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + #if !MMDEBUG #define ASSERT(x) (void)0 #else @@ -227,14 +234,17 @@ StartOver: } void __maps_free(struct Map *map) { + uintptr_t tip; + ASSERT(!TAG(map)); map->size = 0; map->addr = MAP_FAILED; - map->freed = atomic_load_explicit(&__maps.freed, memory_order_relaxed); - for (;;) { - if (atomic_compare_exchange_weak_explicit(&__maps.freed, &map->freed, map, - memory_order_release, - memory_order_relaxed)) + for (tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);;) { + map->freed = (struct Map *)PTR(tip); + if (atomic_compare_exchange_weak_explicit( + &__maps.freed, &tip, ABA(map, TAG(tip) + 1), memory_order_release, + memory_order_relaxed)) break; + pthread_pause_np(); } } @@ -297,12 +307,13 @@ void __maps_insert(struct Map *map) { struct Map *__maps_alloc(void) { struct Map *map; - map = atomic_load_explicit(&__maps.freed, memory_order_relaxed); - while (map) { - if (atomic_compare_exchange_weak_explicit(&__maps.freed, &map, map->freed, - memory_order_acquire, - memory_order_relaxed)) + uintptr_t tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed); + while ((map = (struct Map *)PTR(tip))) { + if (atomic_compare_exchange_weak_explicit( + &__maps.freed, &tip, ABA(map->freed, TAG(tip) + 1), + memory_order_acquire, memory_order_relaxed)) return map; + pthread_pause_np(); } int gransz = __gransize; struct DirectMap sys = sys_mmap(0, gransz, PROT_READ | PROT_WRITE, diff --git a/net/turfwar/turfwar.c b/net/turfwar/turfwar.c index 6ccf9578f..f2cf5ee3f 100644 --- a/net/turfwar/turfwar.c +++ b/net/turfwar/turfwar.c @@ -378,25 +378,25 @@ struct timespec WaitFor(int millis) { bool CheckMem(const char *file, int line, void *ptr) { if (ptr) return true; - kprintf("%s:%d: %P: out of memory: %s\n", file, line, strerror(errno)); + kprintf("%s:%d: %H: out of memory: %s\n", file, line, strerror(errno)); return false; } bool CheckSys(const char *file, int line, long rc) { if (rc != -1) return true; - kprintf("%s:%d: %P: %s\n", file, line, strerror(errno)); + kprintf("%s:%d: %H: %s\n", file, line, strerror(errno)); return false; } bool CheckSql(const char *file, int line, int rc) { if (rc == SQLITE_OK) return true; - kprintf("%s:%d: %P: %s\n", file, line, sqlite3_errstr(rc)); + kprintf("%s:%d: %H: %s\n", file, line, sqlite3_errstr(rc)); return false; } bool CheckDb(const char *file, int line, int rc, sqlite3 *db) { if (rc == SQLITE_OK) return true; - kprintf("%s:%d: %P: %s: %s\n", file, line, sqlite3_errstr(rc), + kprintf("%s:%d: %H: %s: %s\n", file, line, sqlite3_errstr(rc), sqlite3_errmsg(db)); return false; } @@ -1645,7 +1645,7 @@ OnError: void *ScoreWorker(void *arg) { pthread_setname_np(pthread_self(), "ScoreAll"); for (;;) { - LOG("%P regenerating score...\n"); + LOG("%H regenerating score...\n"); Update(&g_asset.score, GenerateScore, -1, MS2CASH(SCORE_UPDATE_MS)); usleep(SCORE_UPDATE_MS * 1000); } @@ -1655,9 +1655,9 @@ void *ScoreWorker(void *arg) { void *ScoreHourWorker(void *arg) { pthread_setname_np(pthread_self(), "ScoreHour"); for (;;) { - LOG("%P regenerating hour score...\n"); + LOG("%H regenerating hour score...\n"); Update(&g_asset.score_hour, GenerateScore, 60L * 60, - MS2CASH(SCORE_UPDATE_MS)); + MS2CASH(SCORE_H_UPDATE_MS)); usleep(SCORE_H_UPDATE_MS * 1000); } } @@ -1666,7 +1666,7 @@ void *ScoreHourWorker(void *arg) { void *ScoreDayWorker(void *arg) { pthread_setname_np(pthread_self(), "ScoreDay"); for (;;) { - LOG("%P regenerating day score...\n"); + LOG("%H regenerating day score...\n"); Update(&g_asset.score_day, GenerateScore, 60L * 60 * 24, MS2CASH(SCORE_D_UPDATE_MS)); usleep(SCORE_D_UPDATE_MS * 1000); @@ -1677,7 +1677,7 @@ void *ScoreDayWorker(void *arg) { void *ScoreWeekWorker(void *arg) { pthread_setname_np(pthread_self(), "ScoreWeek"); for (;;) { - LOG("%P regenerating week score...\n"); + LOG("%H regenerating week score...\n"); Update(&g_asset.score_week, GenerateScore, 60L * 60 * 24 * 7, MS2CASH(SCORE_W_UPDATE_MS)); usleep(SCORE_W_UPDATE_MS * 1000); @@ -1688,7 +1688,7 @@ void *ScoreWeekWorker(void *arg) { void *ScoreMonthWorker(void *arg) { pthread_setname_np(pthread_self(), "ScoreMonth"); for (;;) { - LOG("%P regenerating month score...\n"); + LOG("%H regenerating month score...\n"); Update(&g_asset.score_month, GenerateScore, 60L * 60 * 24 * 30, MS2CASH(SCORE_M_UPDATE_MS)); usleep(SCORE_M_UPDATE_MS * 1000); @@ -1874,7 +1874,7 @@ void Meltdown(void) { int i, marks; struct timespec now; ++g_meltdowns; - LOG("%P panicking because %d out of %d workers is connected\n", g_connections, + LOG("%H panicking because %d out of %d workers is connected\n", g_connections, g_workers); now = timespec_real(); for (marks = i = 0; i < g_workers; ++i) { @@ -1924,9 +1924,9 @@ void CheckDatabase(void) { sqlite3 *db; if (g_integrity) { CHECK_SQL(DbOpen("db.sqlite3", &db)); - LOG("%P Checking database integrity...\n"); + LOG("%H Checking database integrity...\n"); CHECK_SQL(sqlite3_exec(db, "PRAGMA integrity_check", 0, 0, 0)); - LOG("%P Vacuuming database...\n"); + LOG("%H Vacuuming database...\n"); CHECK_SQL(sqlite3_exec(db, "VACUUM", 0, 0, 0)); CHECK_SQL(sqlite3_close(db)); } @@ -2204,11 +2204,11 @@ int main(int argc, char *argv[]) { pthread_attr_destroy(&attr); // time to serve - LOG("%P ready\n"); + LOG("%H ready\n"); Supervisor(0); // cancel listen() - LOG("%P interrupting services...\n"); + LOG("%H interrupting services...\n"); pthread_cancel(scorer); pthread_cancel(recenter); pthread_cancel(g_listener); @@ -2218,7 +2218,7 @@ int main(int argc, char *argv[]) { pthread_cancel(scorer_month); pthread_cancel(replenisher); - LOG("%P joining services...\n"); + LOG("%H joining services...\n"); unassert(!pthread_join(scorer, 0)); unassert(!pthread_join(recenter, 0)); unassert(!pthread_join(g_listener, 0)); @@ -2229,13 +2229,13 @@ int main(int argc, char *argv[]) { unassert(!pthread_join(replenisher, 0)); // cancel read() so that keepalive clients finish faster - LOG("%P interrupting workers...\n"); + LOG("%H interrupting workers...\n"); for (int i = 0; i < g_workers; ++i) if (!g_worker[i].dead) pthread_cancel(g_worker[i].th); // wait for producers to finish - LOG("%P joining workers...\n"); + LOG("%H joining workers...\n"); for (int i = 0; i < g_workers; ++i) unassert(!pthread_join(g_worker[i].th, 0)); @@ -2244,14 +2244,14 @@ int main(int argc, char *argv[]) { // claims worker thread which waits forever for new claims. unassert(!g_claims.count); pthread_cancel(claimer); - LOG("%P waiting for claims worker...\n"); + LOG("%H waiting for claims worker...\n"); unassert(!pthread_join(claimer, 0)); // perform some sanity checks unassert(g_claimsprocessed == g_claimsenqueued); // free memory - LOG("%P freeing memory...\n"); + LOG("%H freeing memory...\n"); FreeAsset(&g_asset.user); FreeAsset(&g_asset.about); FreeAsset(&g_asset.index); @@ -2267,6 +2267,6 @@ int main(int argc, char *argv[]) { time_destroy(); - LOG("%P goodbye\n"); + LOG("%H goodbye\n"); // CheckForMemoryLeaks(); } diff --git a/third_party/nsync/common.c b/third_party/nsync/common.c index d5427a3be..31f7519e4 100644 --- a/third_party/nsync/common.c +++ b/third_party/nsync/common.c @@ -15,6 +15,7 @@ │ See the License for the specific language governing permissions and │ │ limitations under the License. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/atomic.h" #include "libc/calls/calls.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" @@ -136,14 +137,45 @@ waiter *nsync_dll_waiter_samecond_ (struct Dll *e) { /* -------------------------------- */ -static _Atomic(waiter *) free_waiters; +#define MASQUE 0x00fffffffffffff8 +#define PTR(x) ((uintptr_t)(x) & MASQUE) +#define TAG(x) ROL((uintptr_t)(x) & ~MASQUE, 8) +#define ABA(p, t) ((uintptr_t)(p) | (ROR((uintptr_t)(t), 8) & ~MASQUE)) +#define ROL(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define ROR(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +static atomic_uintptr_t free_waiters; static void free_waiters_push (waiter *w) { - int backoff = 0; - w->next_free = atomic_load_explicit (&free_waiters, memory_order_relaxed); - while (!atomic_compare_exchange_weak_explicit (&free_waiters, &w->next_free, w, - memory_order_acq_rel, memory_order_relaxed)) - backoff = pthread_delay_np (free_waiters, backoff); + uintptr_t tip; + ASSERT (!TAG(w)); + tip = atomic_load_explicit (&free_waiters, memory_order_relaxed); + for (;;) { + w->next_free = (waiter *) PTR (tip); + if (atomic_compare_exchange_weak_explicit (&free_waiters, + &tip, + ABA (w, TAG (tip) + 1), + memory_order_release, + memory_order_relaxed)) + break; + pthread_pause_np (); + } +} + +static waiter *free_waiters_pop (void) { + waiter *w; + uintptr_t tip; + tip = atomic_load_explicit (&free_waiters, memory_order_relaxed); + while ((w = (waiter *) PTR (tip))) { + if (atomic_compare_exchange_weak_explicit (&free_waiters, + &tip, + ABA (w->next_free, TAG (tip) + 1), + memory_order_acquire, + memory_order_relaxed)) + break; + pthread_pause_np (); + } + return w; } static void free_waiters_populate (void) { @@ -172,30 +204,12 @@ static void free_waiters_populate (void) { } w->nw.sem = &w->sem; dll_init (&w->nw.q); - NSYNC_ATOMIC_UINT32_STORE_ (&w->nw.waiting, 0); w->nw.flags = NSYNC_WAITER_FLAG_MUCV; - ATM_STORE (&w->remove_count, 0); dll_init (&w->same_condition); - w->flags = 0; free_waiters_push (w); } } -static waiter *free_waiters_pop (void) { - waiter *w; - int backoff = 0; - for (;;) { - if ((w = atomic_load_explicit (&free_waiters, memory_order_relaxed))) { - if (atomic_compare_exchange_weak_explicit (&free_waiters, &w, w->next_free, - memory_order_acq_rel, memory_order_relaxed)) - return w; - backoff = pthread_delay_np (free_waiters, backoff); - } else { - free_waiters_populate (); - } - } -} - /* -------------------------------- */ #define waiter_for_thread __get_tls()->tib_nsync @@ -221,7 +235,8 @@ waiter *nsync_waiter_new_ (void) { tw = waiter_for_thread; w = tw; if (w == NULL || (w->flags & (WAITER_RESERVED|WAITER_IN_USE)) != WAITER_RESERVED) { - w = free_waiters_pop (); + while (!(w = free_waiters_pop ())) + free_waiters_populate (); if (tw == NULL) { w->flags |= WAITER_RESERVED; waiter_for_thread = w; diff --git a/third_party/nsync/mu_semaphore_sem.c b/third_party/nsync/mu_semaphore_sem.c index 9b25ae7a6..b821dd08c 100644 --- a/third_party/nsync/mu_semaphore_sem.c +++ b/third_party/nsync/mu_semaphore_sem.c @@ -52,11 +52,11 @@ static nsync_semaphore *sem_big_enough_for_sem = (nsync_semaphore *) (uintptr_t) (sizeof (struct sem) <= sizeof (*sem_big_enough_for_sem))); static void sems_push (struct sem *f) { - int backoff = 0; f->next = atomic_load_explicit (&g_sems, memory_order_relaxed); while (!atomic_compare_exchange_weak_explicit (&g_sems, &f->next, f, - memory_order_acq_rel, memory_order_relaxed)) - backoff = pthread_delay_np (&g_sems, backoff); + memory_order_acq_rel, + memory_order_relaxed)) + pthread_pause_np (); } static bool nsync_mu_semaphore_sem_create (struct sem *f) {