mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-28 15:28:30 +00:00
Make dlmalloc 2.4x faster for multithreading
This change adds a TLS freelist for small dynamic memory allocations. Cosmopolitan's TIB is now 512 bytes in size. Single-threaded malloc() performance isn't impacted by this, until pthread_create() is called. Single-threaded programs may also want to consider using: #include "libc/mem/tinymalloc.inc" Which will shave 30k off the executable size and sometimes go faster.
This commit is contained in:
parent
deaef81463
commit
07cef612c3
9 changed files with 150 additions and 6 deletions
|
@ -29,7 +29,7 @@ COSMOPOLITAN_C_START_
|
|||
#define errno \
|
||||
(*__extension__({ \
|
||||
errno_t *__ep; \
|
||||
__asm__("sub\t%0,x28,#192-0x3c" : "=r"(__ep)); \
|
||||
__asm__("sub\t%0,x28,#512-0x3c" : "=r"(__ep)); \
|
||||
__ep; \
|
||||
}))
|
||||
#else
|
||||
|
|
|
@ -66,7 +66,7 @@ __gc: .ftrace2
|
|||
|
||||
// if this code fails
|
||||
// check if CosmoTib's size changed
|
||||
sub x8,x28,#192 // __get_tls()
|
||||
sub x8,x28,#512 // __get_tls()
|
||||
ldr x9,[x8,0x18] // tib::garbages
|
||||
ldr x10,[x9] // g->i
|
||||
ldr x8,[x9,8] // g->p
|
||||
|
|
|
@ -121,7 +121,7 @@ vfork:
|
|||
// } else {
|
||||
// __get_tls()->tib_flags &= ~TIB_FLAG_VFORKED;
|
||||
// }
|
||||
sub x1,x28,#192 // sizeof(CosmoTib)
|
||||
sub x1,x28,#512 // sizeof(CosmoTib)
|
||||
ldr x2,[x1,64]
|
||||
cbnz x0,2f
|
||||
orr x2,x2,#TIB_FLAG_VFORKED
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/thread/posixthread.internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
@ -130,6 +131,23 @@ wontreturn void pthread_exit(void *rc) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef MODE_DBG
|
||||
// free tls freelist
|
||||
//
|
||||
// 1. set lengths to -1 so free() thinks it's full
|
||||
// 2. free globally by giving mallocs back to free
|
||||
//
|
||||
short freelen[32];
|
||||
static_assert(sizeof(freelen) == sizeof(tib->tib_freelen), "");
|
||||
memcpy(freelen, tib->tib_freelen, sizeof(freelen));
|
||||
memset(tib->tib_freelen, -1, sizeof(freelen));
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if (freelen[i] > 0) {
|
||||
free(tib->tib_freemem[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// transition the thread to a terminated state
|
||||
status = atomic_load_explicit(&pt->pt_status, memory_order_acquire);
|
||||
do {
|
||||
|
|
|
@ -15,6 +15,7 @@ struct CosmoFtrace { /* 16 */
|
|||
int64_t ft_lastaddr; /* 8 */
|
||||
};
|
||||
|
||||
/* cosmopolitan thread information block (512 bytes) */
|
||||
/* NOTE: update aarch64 libc/errno.h if sizeof changes */
|
||||
/* NOTE: update aarch64 libc/proc/vfork.S if sizeof changes */
|
||||
/* NOTE: update aarch64 libc/nexgen32e/gc.S if sizeof changes */
|
||||
|
@ -38,7 +39,8 @@ struct CosmoTib {
|
|||
uint32_t tib_sigstack_flags;
|
||||
void **tib_keys;
|
||||
void *tib_nsync;
|
||||
void *tib_todo[7];
|
||||
unsigned short tib_freelen[32];
|
||||
void *tib_freemem[32];
|
||||
} __attribute__((__aligned__(64)));
|
||||
|
||||
extern int __threaded;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue