From 388e236360ec1e6692adf8ec3960e4ad6f91379d Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sat, 22 Jun 2024 09:55:02 -0700 Subject: [PATCH] Revert misguided dlmalloc optimization --- third_party/dlmalloc/init.inc | 12 +++++++--- third_party/dlmalloc/mspaces.inc | 7 ------ third_party/dlmalloc/threaded.inc | 39 ++++++++++++------------------- 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/third_party/dlmalloc/init.inc b/third_party/dlmalloc/init.inc index e0d303616..970bad026 100644 --- a/third_party/dlmalloc/init.inc +++ b/third_party/dlmalloc/init.inc @@ -7,18 +7,24 @@ #if ONLY_MSPACES static void dlmalloc_pre_fork(void) { + mstate h; for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) - ACQUIRE_LOCK(&g_heaps[i].m.mutex); + if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire))) + ACQUIRE_LOCK(&h->mutex); } static void dlmalloc_post_fork_parent(void) { + mstate h; for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) - RELEASE_LOCK(&g_heaps[i].m.mutex); + if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire))) + RELEASE_LOCK(&h->mutex); } static void dlmalloc_post_fork_child(void) { + mstate h; for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) - (void)INITIAL_LOCK(&g_heaps[i].m.mutex); + if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire))) + (void)INITIAL_LOCK(&h->mutex); } #else diff --git a/third_party/dlmalloc/mspaces.inc b/third_party/dlmalloc/mspaces.inc index 86e26609f..1f048d0eb 100644 --- a/third_party/dlmalloc/mspaces.inc +++ b/third_party/dlmalloc/mspaces.inc @@ -23,13 +23,6 @@ static mstate init_user_mstate(char* tbase, size_t tsize) { return m; } -// [jart] rather than calling mmap() 96 times from _start() just use .bss -static void init_heap(union Heap *heap, int locked) { - mstate m = init_user_mstate(heap->mspace, sizeof(*heap)); - m->seg.sflags = USE_MMAP_BIT; - set_lock(m, locked); -} - mspace create_mspace(size_t capacity, int locked) { mstate m = 0; size_t msize; diff --git a/third_party/dlmalloc/threaded.inc b/third_party/dlmalloc/threaded.inc index 999506ac3..904201603 100644 --- a/third_party/dlmalloc/threaded.inc +++ b/third_party/dlmalloc/threaded.inc @@ -31,20 +31,9 @@ #error "threaded dlmalloc needs footers and mspaces" #endif -union Heap { - struct malloc_state mstate; - struct { - size_t top_foot[2]; - struct malloc_state m; - }; - _Alignas(16) char mspace[DEFAULT_GRANULARITY]; -}; - -static void init_heap(union Heap *heap, int locked); - static struct magicu magiu; static unsigned g_heapslen; -static union Heap g_heaps[128]; +static mstate g_heaps[128]; void dlfree(void *p) { return mspace_free(0, p); @@ -65,7 +54,7 @@ int dlmallopt(int param_number, int value) { int dlmalloc_trim(size_t pad) { int got_some = 0; for (unsigned i = 0; i < g_heapslen; ++i) - got_some |= mspace_trim(&g_heaps[i].m, pad); + got_some |= mspace_trim(g_heaps[i], pad); return got_some; } @@ -79,7 +68,7 @@ void dlmalloc_inspect_all(void handler(void *start, void *end, size_t used_bytes, void *callback_arg), void *arg) { for (unsigned i = 0; i < g_heapslen; ++i) - mspace_inspect_all(&g_heaps[i].m, handler, arg); + mspace_inspect_all(g_heaps[i], handler, arg); } forceinline mstate get_arena(void) { @@ -93,11 +82,11 @@ forceinline mstate get_arena(void) { asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); cpu = tpidr_el0 & 255; #endif - return &g_heaps[__magicu_div(cpu, magiu) % g_heapslen].m; + return g_heaps[__magicu_div(cpu, magiu) % g_heapslen]; } static void *dlmalloc_single(size_t n) { - return mspace_malloc(&g_heaps[0].m, n); + return mspace_malloc(g_heaps[0], n); } static void *dlmalloc_threaded(size_t n) { @@ -105,7 +94,7 @@ static void *dlmalloc_threaded(size_t n) { } static void *dlcalloc_single(size_t n, size_t z) { - return mspace_calloc(&g_heaps[0].m, n, z); + return mspace_calloc(g_heaps[0], n, z); } static void *dlcalloc_threaded(size_t n, size_t z) { @@ -113,7 +102,7 @@ static void *dlcalloc_threaded(size_t n, size_t z) { } static void *dlrealloc_single(void *p, size_t n) { - return mspace_realloc(&g_heaps[0].m, p, n); + return mspace_realloc(g_heaps[0], p, n); } static void *dlrealloc_threaded(void *p, size_t n) { @@ -124,7 +113,7 @@ static void *dlrealloc_threaded(void *p, size_t n) { } static void *dlmemalign_single(size_t a, size_t n) { - return mspace_memalign(&g_heaps[0].m, a, n); + return mspace_memalign(g_heaps[0], a, n); } static void *dlmemalign_threaded(size_t a, size_t n) { @@ -132,7 +121,7 @@ static void *dlmemalign_threaded(size_t a, size_t n) { } static struct mallinfo dlmallinfo_single(void) { - return mspace_mallinfo(&g_heaps[0].m); + return mspace_mallinfo(g_heaps[0]); } static struct mallinfo dlmallinfo_threaded(void) { @@ -155,7 +144,8 @@ static void use_single_heap(bool uses_locks) { dlrealloc = dlrealloc_single; dlmemalign = dlmemalign_single; dlmallinfo = dlmallinfo_single; - init_heap(&g_heaps[0], uses_locks); + if (!(g_heaps[0] = create_mspace(0, uses_locks))) + __builtin_trap(); } static void threaded_dlmalloc(void) { @@ -190,9 +180,10 @@ static void threaded_dlmalloc(void) { // we need this too due to linux's cpu count affinity hack g_heapslen = heaps; - // create the heaps + // create the arenas for (size_t i = 0; i < g_heapslen; ++i) - init_heap(&g_heaps[i], true); + if (!(g_heaps[i] = create_mspace(0, true))) + __builtin_trap(); // install function pointers dlmalloc = dlmalloc_threaded; @@ -201,5 +192,5 @@ static void threaded_dlmalloc(void) { dlmemalign = dlmemalign_threaded; dlmallinfo = dlmallinfo_threaded; - STRACE("created %d dlmalloc heaps for %d cpus", heaps, cpus); + STRACE("created %d dlmalloc arenas for %d cpus", heaps, cpus); }