Make dlmalloc a little faster

This change also documents the libc arena allocator.
This commit is contained in:
Justine Tunney 2022-06-09 18:48:15 -07:00
parent fa1e8a3e65
commit a41669dec6
5 changed files with 59 additions and 19 deletions

17
examples/hello4.c Normal file
View file

@ -0,0 +1,17 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to this file,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "libc/math.h"
#include "libc/stdio/stdio.h"
int main(int argc, char *argv[]) {
volatile double x = 123;
printf("cos(%g) is %g\n", x, cos(x));
return 0;
}

View file

@ -126,7 +126,7 @@ static dontinline bool __arena_grow(size_t offset, size_t request) {
return false; return false;
} }
static void *__arena_alloc(size_t a, size_t n) { static inline void *__arena_alloc(size_t a, size_t n) {
size_t o; size_t o;
if (!n) n = 1; if (!n) n = 1;
o = ROUNDUP(__arena.offset[__arena.depth] + sizeof(size_t), a); o = ROUNDUP(__arena.offset[__arena.depth] + sizeof(size_t), a);
@ -299,6 +299,25 @@ static void __arena_init(void) {
atexit(__arena_destroy); atexit(__arena_destroy);
} }
/**
* Pushes memory arena.
*
* This allocator gives a ~3x performance boost over dlmalloc, mostly
* because it isn't thread safe and it doesn't do defragmentation.
*
* Calling this function will push a new arena. It may be called
* multiple times from the main thread recursively. The first time it's
* called, it hooks all the regular memory allocation functions. Any
* allocations that were made previously outside the arena, will be
* passed on to the previous hooks. Then, the basic idea, is rather than
* bothering with free() you can just call __arena_pop() to bulk free.
*
* Arena allocations also have a slight size advantage, since 32-bit
* pointers are always used. The maximum amount of arena memory is
* 805,175,296 bytes.
*
* @see __arena_pop()
*/
void __arena_push(void) { void __arena_push(void) {
if (UNLIKELY(!__arena.once)) { if (UNLIKELY(!__arena.once)) {
__arena_init(); __arena_init();
@ -313,6 +332,15 @@ void __arena_push(void) {
++__arena.depth; ++__arena.depth;
} }
/**
* Pops memory arena.
*
* This pops the most recently created arena, freeing all the memory
* that was allocated between the push and pop arena calls. If this is
* the last arena on the stack, then the old malloc hooks are restored.
*
* @see __arena_push()
*/
void __arena_pop(void) { void __arena_pop(void) {
size_t a, b, greed; size_t a, b, greed;
__arena_check(); __arena_check();

View file

@ -20,6 +20,7 @@
#include "libc/fmt/leb128.h" #include "libc/fmt/leb128.h"
#include "libc/intrin/lockcmpxchg.h" #include "libc/intrin/lockcmpxchg.h"
#include "libc/nexgen32e/crc32.h" #include "libc/nexgen32e/crc32.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "third_party/zlib/zlib.h" #include "third_party/zlib/zlib.h"
@ -47,16 +48,7 @@ void *xloadzd(bool *o, void **t, const void *p, size_t n, size_t m, size_t c,
int64_t x, y; int64_t x, y;
assert(z == 2 || z == 4); assert(z == 2 || z == 4);
b = q = malloc(m); b = q = malloc(m);
zs.zfree = 0; __inflate(q, m, p, n);
zs.zalloc = 0;
zs.next_in = p;
zs.avail_in = n;
zs.total_in = n;
zs.avail_out = m;
zs.total_out = m;
zs.next_out = (void *)q;
inflateInit2(&zs, -MAX_WBITS);
inflate(&zs, Z_NO_FLUSH);
r = memalign(z, c * z); r = memalign(z, c * z);
for (x = i = 0; i < c; ++i) { for (x = i = 0; i < c; ++i) {
b += unzleb64(b, 10, &y); b += unzleb64(b, 10, &y);

View file

@ -1,4 +1,5 @@
#include "libc/assert.h" #include "libc/assert.h"
#include "libc/bits/likely.h"
#include "libc/bits/weaken.h" #include "libc/bits/weaken.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/dce.h" #include "libc/dce.h"
@ -22,7 +23,7 @@
#define HAVE_MMAP 1 #define HAVE_MMAP 1
#define HAVE_MREMAP 0 #define HAVE_MREMAP 0
#define HAVE_MORECORE 0 #define HAVE_MORECORE 0
#define USE_LOCKS 1 #define USE_SPIN_LOCKS 1
#define MORECORE_CONTIGUOUS 0 #define MORECORE_CONTIGUOUS 0
#define MALLOC_INSPECT_ALL 1 #define MALLOC_INSPECT_ALL 1
@ -820,12 +821,7 @@ void dlfree(void* mem) {
void* dlcalloc(size_t n_elements, size_t elem_size) { void* dlcalloc(size_t n_elements, size_t elem_size) {
void* mem; void* mem;
size_t req = 0; size_t req = 0;
if (n_elements != 0) { if (__builtin_mul_overflow(n_elements, elem_size, &req)) req = -1;
req = n_elements * elem_size;
if (((n_elements | elem_size) & ~(size_t)0xffff) &&
(req / n_elements != elem_size))
req = MAX_SIZE_T; /* force downstream failure on overflow */
}
mem = dlmalloc(req); mem = dlmalloc(req);
if (mem != 0 && calloc_must_clear(mem2chunk(mem))) if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
bzero(mem, req); bzero(mem, req);
@ -1216,7 +1212,7 @@ void* dlrealloc(void* oldmem, size_t bytes) {
if (oldmem == 0) { if (oldmem == 0) {
mem = dlmalloc(bytes); mem = dlmalloc(bytes);
} }
else if (bytes >= MAX_REQUEST) { else if (UNLIKELY(bytes >= MAX_REQUEST)) {
MALLOC_FAILURE_ACTION; MALLOC_FAILURE_ACTION;
} }
#ifdef REALLOC_ZERO_BYTES_FREES #ifdef REALLOC_ZERO_BYTES_FREES

View file

@ -50,6 +50,13 @@ $(THIRD_PARTY_DLMALLOC_A).pkg: \
$(THIRD_PARTY_DLMALLOC_A_OBJS) \ $(THIRD_PARTY_DLMALLOC_A_OBJS) \
$(foreach x,$(THIRD_PARTY_DLMALLOC_A_DIRECTDEPS),$($(x)_A).pkg) $(foreach x,$(THIRD_PARTY_DLMALLOC_A_DIRECTDEPS),$($(x)_A).pkg)
# README file recommends -O3
# It does double performance in default mode
o//third_party/dlmalloc/dlmalloc.o \
o/rel/third_party/dlmalloc/dlmalloc.o: \
OVERRIDE_CFLAGS += \
-O3
# we can't use address sanitizer because: # we can't use address sanitizer because:
# address sanitizer depends on dlmalloc # address sanitizer depends on dlmalloc
o/$(MODE)/third_party/dlmalloc/dlmalloc.o: \ o/$(MODE)/third_party/dlmalloc/dlmalloc.o: \