Make bulk_free() go faster

This commit is contained in:
Justine Tunney 2024-12-23 20:14:01 -08:00
parent 624573207e
commit c8e10eef30
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
13 changed files with 54 additions and 36 deletions

1
.gitignore vendored
View file

@ -15,3 +15,4 @@ __pycache__
/tool/emacs/*.elc
/perf.data
/perf.data.old
/qemu*core

View file

@ -25,8 +25,8 @@ int cosmo_futex_wake(_COSMO_ATOMIC(int) *, int, char);
int cosmo_futex_wait(_COSMO_ATOMIC(int) *, int, char, int,
const struct timespec *);
errno_t cosmo_stack_alloc(size_t *, size_t *, void **) libcesque;
errno_t cosmo_stack_free(void *, size_t, size_t) libcesque;
errno_t cosmo_stack_alloc(unsigned *, unsigned *, void **) libcesque;
errno_t cosmo_stack_free(void *, unsigned, unsigned) libcesque;
void cosmo_stack_clear(void) libcesque;
void cosmo_stack_setmaxstacks(int) libcesque;
int cosmo_stack_getmaxstacks(void) libcesque;

View file

@ -42,8 +42,8 @@
struct CosmoStack {
struct Dll elem;
void *stackaddr;
size_t stacksize;
size_t guardsize;
unsigned stacksize;
unsigned guardsize;
};
struct CosmoStacks {
@ -215,13 +215,13 @@ void cosmo_stack_setmaxstacks(int maxstacks) {
* This function returns 0 on success, or an errno on error. See the
* documentation of mmap() for a list possible errors that may occur.
*/
errno_t cosmo_stack_alloc(size_t *inout_stacksize, //
size_t *inout_guardsize, //
errno_t cosmo_stack_alloc(unsigned *inout_stacksize, //
unsigned *inout_guardsize, //
void **out_addr) {
// validate arguments
size_t stacksize = *inout_stacksize;
size_t guardsize = *inout_guardsize;
unsigned stacksize = *inout_stacksize;
unsigned guardsize = *inout_guardsize;
stacksize = (stacksize + __gransize - 1) & -__gransize;
guardsize = (guardsize + __pagesize - 1) & -__pagesize;
if (guardsize + __pagesize > stacksize)
@ -283,7 +283,8 @@ static void cosmo_stack_setup(void) {
* variable is never clobbered. You can only dependably count on this to
* return an error on failure when you say `cosmo_stack_setmaxstacks(0)`
*/
errno_t cosmo_stack_free(void *stackaddr, size_t stacksize, size_t guardsize) {
errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
unsigned guardsize) {
stacksize = (stacksize + __gransize - 1) & -__gransize;
guardsize = (guardsize + __pagesize - 1) & -__pagesize;
if (guardsize + __pagesize > stacksize)

View file

@ -35,8 +35,8 @@
*/
void *NewCosmoStack(void) {
void *stackaddr;
size_t stacksize = GetStackSize();
size_t guardsize = GetGuardSize();
unsigned stacksize = GetStackSize();
unsigned guardsize = GetGuardSize();
errno_t err = cosmo_stack_alloc(&stacksize, &guardsize, &stackaddr);
if (!err)
return stackaddr;

View file

@ -78,8 +78,7 @@ struct PosixThread {
atomic_int ptid; // transitions 0 → tid
atomic_int pt_refs; // prevents decimation
void *(*pt_start)(void *); // creation callback
void *pt_arg; // start's parameter
void *pt_rc; // start's return value
void *pt_val; // start param / return val
char *pt_tls; // bottom of tls allocation
struct CosmoTib *tib; // middle of tls allocation
struct Dll list; // list of threads
@ -105,7 +104,6 @@ int _pthread_tid(struct PosixThread *) libcesque;
intptr_t _pthread_syshand(struct PosixThread *) libcesque;
long _pthread_cancel_ack(void) libcesque;
void _pthread_decimate(void) libcesque;
void _pthread_free(struct PosixThread *) libcesque;
void _pthread_lock(void) libcesque;
void _pthread_onfork_child(void) libcesque;
void _pthread_onfork_parent(void) libcesque;

View file

@ -63,11 +63,13 @@ static void _pthread_onfork(int i, const char *op) {
}
void _pthread_onfork_prepare(void) {
pthread_mutex_lock(&_atforks.lock);
_pthread_onfork(0, "prepare");
}
void _pthread_onfork_parent(void) {
_pthread_onfork(1, "parent");
pthread_mutex_unlock(&_atforks.lock);
}
void _pthread_onfork_child(void) {

View file

@ -67,7 +67,7 @@ __static_yoink("_pthread_onfork_prepare");
__static_yoink("_pthread_onfork_parent");
__static_yoink("_pthread_onfork_child");
void _pthread_free(struct PosixThread *pt) {
static void _pthread_free(struct PosixThread *pt) {
// thread must be removed from _pthread_list before calling
unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list);
@ -93,10 +93,13 @@ void _pthread_free(struct PosixThread *pt) {
}
// free heap memory associated with thread
if (pt->pt_flags & PT_OWNSIGALTSTACK)
free(pt->pt_attr.__sigaltstackaddr);
free(pt->pt_tls);
free(pt);
bulk_free(
(void *[]){
pt->pt_flags & PT_OWNSIGALTSTACK ? pt->pt_attr.__sigaltstackaddr : 0,
pt->pt_tls,
pt,
},
3);
}
void _pthread_decimate(void) {
@ -137,7 +140,6 @@ void _pthread_decimate(void) {
}
static int PosixThread(void *arg, int tid) {
void *rc;
struct PosixThread *pt = arg;
// setup scheduling
@ -167,11 +169,11 @@ static int PosixThread(void *arg, int tid) {
} else {
sys_sigprocmask(SIG_SETMASK, &pt->pt_attr.__sigmask, 0);
}
rc = pt->pt_start(pt->pt_arg);
void *ret = pt->pt_start(pt->pt_val);
// ensure pthread_cleanup_pop(), and pthread_exit() popped cleanup
unassert(!pt->pt_cleanup);
// calling pthread_exit() will either jump back here, or call exit
pthread_exit(rc);
pthread_exit(ret);
}
// avoid signal handler being triggered after we trash our own stack
@ -196,7 +198,7 @@ static errno_t pthread_create_impl(pthread_t *thread,
dll_init(&pt->list);
pt->pt_locale = &__global_locale;
pt->pt_start = start_routine;
pt->pt_arg = arg;
pt->pt_val = arg;
// create thread local storage memory
if (!(pt->pt_tls = _mktls(&pt->tib))) {

View file

@ -88,7 +88,7 @@ wontreturn void pthread_exit(void *rc) {
// set state
pt->pt_flags |= PT_NOCANCEL | PT_EXITING;
pt->pt_rc = rc;
pt->pt_val = rc;
// free resources
__cxa_thread_finalize();

View file

@ -139,7 +139,7 @@ errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
memory_order_release);
_pthread_zombify(pt);
if (value_ptr)
*value_ptr = pt->pt_rc;
*value_ptr = pt->pt_val;
}
_pthread_unref(pt);

View file

@ -130,8 +130,8 @@ typedef struct pthread_attr_s {
int __contentionscope;
int __sigaltstacksize;
uint64_t __sigmask;
size_t __guardsize;
size_t __stacksize;
unsigned __guardsize;
unsigned __stacksize;
void *__stackaddr;
void *__sigaltstackaddr;
} pthread_attr_t;

View file

@ -22,7 +22,6 @@
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/cxaatexit.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/safemacros.h"
#include "libc/macros.h"
#include "libc/mem/gc.h"
@ -162,7 +161,7 @@ void *bulk[1024];
void BulkFreeBenchSetup(void) {
size_t i;
for (i = 0; i < ARRAYLEN(bulk); ++i) {
bulk[i] = malloc(rand() % 64);
bulk[i] = rand() % 64 ? malloc(rand() % 64) : 0;
}
}

View file

@ -62,11 +62,6 @@
#include "locks.inc"
#include "chunks.inc"
#include "headfoot.inc"
#if ONLY_MSPACES
#include "threaded.inc"
#endif
#include "global.inc"
#include "system.inc"
#include "hooks.inc"
@ -74,6 +69,11 @@
#include "indexing.inc"
#include "binmaps.inc"
#include "runtimechecks.inc"
#if ONLY_MSPACES
#include "threaded.inc"
#endif
#include "init.inc"
#include "debuglib.inc"
#include "statistics.inc"

View file

@ -61,8 +61,23 @@ int dlmalloc_trim(size_t pad) {
}
size_t dlbulk_free(void *array[], size_t nelem) {
for (size_t i = 0; i < nelem; ++i)
mspace_free(0, array[i]);
size_t j = 0;
mstate msp = (mstate)-1;
for (size_t i = 0; i < nelem; ++i) {
mstate next;
if (array[i]) {
next = get_mstate_for(mem2chunk(array[i]));
if (next != msp) {
if (j)
mspace_bulk_free(msp, array, j);
msp = next;
j = 0;
}
array[j++] = array[i];
}
}
if (j)
mspace_bulk_free(msp, array, j);
return 0;
}