Make dlmalloc 2.4x faster for multithreading

This change adds a TLS freelist for small dynamic memory allocations.
Cosmopolitan's TIB is now 512 bytes in size. Single-threaded malloc()
performance isn't impacted by this, until pthread_create() is called.
Single-threaded programs may also want to consider using:

    #include "libc/mem/tinymalloc.inc"

Which will shave 30k off the executable size and sometimes go faster.
This commit is contained in:
Justine Tunney 2024-05-28 11:13:12 -07:00
parent deaef81463
commit 07cef612c3
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
9 changed files with 150 additions and 6 deletions

View file

@ -15,6 +15,7 @@ struct CosmoFtrace { /* 16 */
int64_t ft_lastaddr; /* 8 */
};
/* cosmopolitan thread information block (512 bytes) */
/* NOTE: update aarch64 libc/errno.h if sizeof changes */
/* NOTE: update aarch64 libc/proc/vfork.S if sizeof changes */
/* NOTE: update aarch64 libc/nexgen32e/gc.S if sizeof changes */
@ -38,7 +39,8 @@ struct CosmoTib {
uint32_t tib_sigstack_flags;
void **tib_keys;
void *tib_nsync;
void *tib_todo[7];
unsigned short tib_freelen[32];
void *tib_freemem[32];
} __attribute__((__aligned__(64)));
extern int __threaded;