Revert misguided dlmalloc optimization

This commit is contained in:
Justine Tunney 2024-06-22 09:55:02 -07:00
parent f2c8ddbbe3
commit 388e236360
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
3 changed files with 24 additions and 34 deletions

View file

@ -7,18 +7,24 @@
#if ONLY_MSPACES #if ONLY_MSPACES
static void dlmalloc_pre_fork(void) { static void dlmalloc_pre_fork(void) {
mstate h;
for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i)
ACQUIRE_LOCK(&g_heaps[i].m.mutex); if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire)))
ACQUIRE_LOCK(&h->mutex);
} }
static void dlmalloc_post_fork_parent(void) { static void dlmalloc_post_fork_parent(void) {
mstate h;
for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i)
RELEASE_LOCK(&g_heaps[i].m.mutex); if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire)))
RELEASE_LOCK(&h->mutex);
} }
static void dlmalloc_post_fork_child(void) { static void dlmalloc_post_fork_child(void) {
mstate h;
for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i)
(void)INITIAL_LOCK(&g_heaps[i].m.mutex); if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire)))
(void)INITIAL_LOCK(&h->mutex);
} }
#else #else

View file

@ -23,13 +23,6 @@ static mstate init_user_mstate(char* tbase, size_t tsize) {
return m; return m;
} }
// [jart] rather than calling mmap() 96 times from _start() just use .bss
static void init_heap(union Heap *heap, int locked) {
mstate m = init_user_mstate(heap->mspace, sizeof(*heap));
m->seg.sflags = USE_MMAP_BIT;
set_lock(m, locked);
}
mspace create_mspace(size_t capacity, int locked) { mspace create_mspace(size_t capacity, int locked) {
mstate m = 0; mstate m = 0;
size_t msize; size_t msize;

View file

@ -31,20 +31,9 @@
#error "threaded dlmalloc needs footers and mspaces" #error "threaded dlmalloc needs footers and mspaces"
#endif #endif
union Heap {
struct malloc_state mstate;
struct {
size_t top_foot[2];
struct malloc_state m;
};
_Alignas(16) char mspace[DEFAULT_GRANULARITY];
};
static void init_heap(union Heap *heap, int locked);
static struct magicu magiu; static struct magicu magiu;
static unsigned g_heapslen; static unsigned g_heapslen;
static union Heap g_heaps[128]; static mstate g_heaps[128];
void dlfree(void *p) { void dlfree(void *p) {
return mspace_free(0, p); return mspace_free(0, p);
@ -65,7 +54,7 @@ int dlmallopt(int param_number, int value) {
int dlmalloc_trim(size_t pad) { int dlmalloc_trim(size_t pad) {
int got_some = 0; int got_some = 0;
for (unsigned i = 0; i < g_heapslen; ++i) for (unsigned i = 0; i < g_heapslen; ++i)
got_some |= mspace_trim(&g_heaps[i].m, pad); got_some |= mspace_trim(g_heaps[i], pad);
return got_some; return got_some;
} }
@ -79,7 +68,7 @@ void dlmalloc_inspect_all(void handler(void *start, void *end,
size_t used_bytes, void *callback_arg), size_t used_bytes, void *callback_arg),
void *arg) { void *arg) {
for (unsigned i = 0; i < g_heapslen; ++i) for (unsigned i = 0; i < g_heapslen; ++i)
mspace_inspect_all(&g_heaps[i].m, handler, arg); mspace_inspect_all(g_heaps[i], handler, arg);
} }
forceinline mstate get_arena(void) { forceinline mstate get_arena(void) {
@ -93,11 +82,11 @@ forceinline mstate get_arena(void) {
asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0));
cpu = tpidr_el0 & 255; cpu = tpidr_el0 & 255;
#endif #endif
return &g_heaps[__magicu_div(cpu, magiu) % g_heapslen].m; return g_heaps[__magicu_div(cpu, magiu) % g_heapslen];
} }
static void *dlmalloc_single(size_t n) { static void *dlmalloc_single(size_t n) {
return mspace_malloc(&g_heaps[0].m, n); return mspace_malloc(g_heaps[0], n);
} }
static void *dlmalloc_threaded(size_t n) { static void *dlmalloc_threaded(size_t n) {
@ -105,7 +94,7 @@ static void *dlmalloc_threaded(size_t n) {
} }
static void *dlcalloc_single(size_t n, size_t z) { static void *dlcalloc_single(size_t n, size_t z) {
return mspace_calloc(&g_heaps[0].m, n, z); return mspace_calloc(g_heaps[0], n, z);
} }
static void *dlcalloc_threaded(size_t n, size_t z) { static void *dlcalloc_threaded(size_t n, size_t z) {
@ -113,7 +102,7 @@ static void *dlcalloc_threaded(size_t n, size_t z) {
} }
static void *dlrealloc_single(void *p, size_t n) { static void *dlrealloc_single(void *p, size_t n) {
return mspace_realloc(&g_heaps[0].m, p, n); return mspace_realloc(g_heaps[0], p, n);
} }
static void *dlrealloc_threaded(void *p, size_t n) { static void *dlrealloc_threaded(void *p, size_t n) {
@ -124,7 +113,7 @@ static void *dlrealloc_threaded(void *p, size_t n) {
} }
static void *dlmemalign_single(size_t a, size_t n) { static void *dlmemalign_single(size_t a, size_t n) {
return mspace_memalign(&g_heaps[0].m, a, n); return mspace_memalign(g_heaps[0], a, n);
} }
static void *dlmemalign_threaded(size_t a, size_t n) { static void *dlmemalign_threaded(size_t a, size_t n) {
@ -132,7 +121,7 @@ static void *dlmemalign_threaded(size_t a, size_t n) {
} }
static struct mallinfo dlmallinfo_single(void) { static struct mallinfo dlmallinfo_single(void) {
return mspace_mallinfo(&g_heaps[0].m); return mspace_mallinfo(g_heaps[0]);
} }
static struct mallinfo dlmallinfo_threaded(void) { static struct mallinfo dlmallinfo_threaded(void) {
@ -155,7 +144,8 @@ static void use_single_heap(bool uses_locks) {
dlrealloc = dlrealloc_single; dlrealloc = dlrealloc_single;
dlmemalign = dlmemalign_single; dlmemalign = dlmemalign_single;
dlmallinfo = dlmallinfo_single; dlmallinfo = dlmallinfo_single;
init_heap(&g_heaps[0], uses_locks); if (!(g_heaps[0] = create_mspace(0, uses_locks)))
__builtin_trap();
} }
static void threaded_dlmalloc(void) { static void threaded_dlmalloc(void) {
@ -190,9 +180,10 @@ static void threaded_dlmalloc(void) {
// we need this too due to linux's cpu count affinity hack // we need this too due to linux's cpu count affinity hack
g_heapslen = heaps; g_heapslen = heaps;
// create the heaps // create the arenas
for (size_t i = 0; i < g_heapslen; ++i) for (size_t i = 0; i < g_heapslen; ++i)
init_heap(&g_heaps[i], true); if (!(g_heaps[i] = create_mspace(0, true)))
__builtin_trap();
// install function pointers // install function pointers
dlmalloc = dlmalloc_threaded; dlmalloc = dlmalloc_threaded;
@ -201,5 +192,5 @@ static void threaded_dlmalloc(void) {
dlmemalign = dlmemalign_threaded; dlmemalign = dlmemalign_threaded;
dlmallinfo = dlmallinfo_threaded; dlmallinfo = dlmallinfo_threaded;
STRACE("created %d dlmalloc heaps for %d cpus", heaps, cpus); STRACE("created %d dlmalloc arenas for %d cpus", heaps, cpus);
} }