mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-24 14:22:28 +00:00
Make dlmalloc a little faster
This change also documents the libc arena allocator.
This commit is contained in:
parent
fa1e8a3e65
commit
a41669dec6
5 changed files with 59 additions and 19 deletions
17
examples/hello4.c
Normal file
17
examples/hello4.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
#if 0
|
||||
/*─────────────────────────────────────────────────────────────────╗
|
||||
│ To the extent possible under law, Justine Tunney has waived │
|
||||
│ all copyright and related or neighboring rights to this file, │
|
||||
│ as it is written in the following disclaimers: │
|
||||
│ • http://unlicense.org/ │
|
||||
│ • http://creativecommons.org/publicdomain/zero/1.0/ │
|
||||
╚─────────────────────────────────────────────────────────────────*/
|
||||
#endif
|
||||
#include "libc/math.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
volatile double x = 123;
|
||||
printf("cos(%g) is %g\n", x, cos(x));
|
||||
return 0;
|
||||
}
|
|
@ -126,7 +126,7 @@ static dontinline bool __arena_grow(size_t offset, size_t request) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static void *__arena_alloc(size_t a, size_t n) {
|
||||
static inline void *__arena_alloc(size_t a, size_t n) {
|
||||
size_t o;
|
||||
if (!n) n = 1;
|
||||
o = ROUNDUP(__arena.offset[__arena.depth] + sizeof(size_t), a);
|
||||
|
@ -299,6 +299,25 @@ static void __arena_init(void) {
|
|||
atexit(__arena_destroy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pushes memory arena.
|
||||
*
|
||||
* This allocator gives a ~3x performance boost over dlmalloc, mostly
|
||||
* because it isn't thread safe and it doesn't do defragmentation.
|
||||
*
|
||||
* Calling this function will push a new arena. It may be called
|
||||
* multiple times from the main thread recursively. The first time it's
|
||||
* called, it hooks all the regular memory allocation functions. Any
|
||||
* allocations that were made previously outside the arena, will be
|
||||
* passed on to the previous hooks. Then, the basic idea, is rather than
|
||||
* bothering with free() you can just call __arena_pop() to bulk free.
|
||||
*
|
||||
* Arena allocations also have a slight size advantage, since 32-bit
|
||||
* pointers are always used. The maximum amount of arena memory is
|
||||
* 805,175,296 bytes.
|
||||
*
|
||||
* @see __arena_pop()
|
||||
*/
|
||||
void __arena_push(void) {
|
||||
if (UNLIKELY(!__arena.once)) {
|
||||
__arena_init();
|
||||
|
@ -313,6 +332,15 @@ void __arena_push(void) {
|
|||
++__arena.depth;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pops memory arena.
|
||||
*
|
||||
* This pops the most recently created arena, freeing all the memory
|
||||
* that was allocated between the push and pop arena calls. If this is
|
||||
* the last arena on the stack, then the old malloc hooks are restored.
|
||||
*
|
||||
* @see __arena_push()
|
||||
*/
|
||||
void __arena_pop(void) {
|
||||
size_t a, b, greed;
|
||||
__arena_check();
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "libc/fmt/leb128.h"
|
||||
#include "libc/intrin/lockcmpxchg.h"
|
||||
#include "libc/nexgen32e/crc32.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/x/x.h"
|
||||
#include "third_party/zlib/zlib.h"
|
||||
|
@ -47,16 +48,7 @@ void *xloadzd(bool *o, void **t, const void *p, size_t n, size_t m, size_t c,
|
|||
int64_t x, y;
|
||||
assert(z == 2 || z == 4);
|
||||
b = q = malloc(m);
|
||||
zs.zfree = 0;
|
||||
zs.zalloc = 0;
|
||||
zs.next_in = p;
|
||||
zs.avail_in = n;
|
||||
zs.total_in = n;
|
||||
zs.avail_out = m;
|
||||
zs.total_out = m;
|
||||
zs.next_out = (void *)q;
|
||||
inflateInit2(&zs, -MAX_WBITS);
|
||||
inflate(&zs, Z_NO_FLUSH);
|
||||
__inflate(q, m, p, n);
|
||||
r = memalign(z, c * z);
|
||||
for (x = i = 0; i < c; ++i) {
|
||||
b += unzleb64(b, 10, &y);
|
||||
|
|
12
third_party/dlmalloc/dlmalloc.c
vendored
12
third_party/dlmalloc/dlmalloc.c
vendored
|
@ -1,4 +1,5 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/bits/likely.h"
|
||||
#include "libc/bits/weaken.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
|
@ -22,7 +23,7 @@
|
|||
#define HAVE_MMAP 1
|
||||
#define HAVE_MREMAP 0
|
||||
#define HAVE_MORECORE 0
|
||||
#define USE_LOCKS 1
|
||||
#define USE_SPIN_LOCKS 1
|
||||
#define MORECORE_CONTIGUOUS 0
|
||||
#define MALLOC_INSPECT_ALL 1
|
||||
|
||||
|
@ -820,12 +821,7 @@ void dlfree(void* mem) {
|
|||
void* dlcalloc(size_t n_elements, size_t elem_size) {
|
||||
void* mem;
|
||||
size_t req = 0;
|
||||
if (n_elements != 0) {
|
||||
req = n_elements * elem_size;
|
||||
if (((n_elements | elem_size) & ~(size_t)0xffff) &&
|
||||
(req / n_elements != elem_size))
|
||||
req = MAX_SIZE_T; /* force downstream failure on overflow */
|
||||
}
|
||||
if (__builtin_mul_overflow(n_elements, elem_size, &req)) req = -1;
|
||||
mem = dlmalloc(req);
|
||||
if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
|
||||
bzero(mem, req);
|
||||
|
@ -1216,7 +1212,7 @@ void* dlrealloc(void* oldmem, size_t bytes) {
|
|||
if (oldmem == 0) {
|
||||
mem = dlmalloc(bytes);
|
||||
}
|
||||
else if (bytes >= MAX_REQUEST) {
|
||||
else if (UNLIKELY(bytes >= MAX_REQUEST)) {
|
||||
MALLOC_FAILURE_ACTION;
|
||||
}
|
||||
#ifdef REALLOC_ZERO_BYTES_FREES
|
||||
|
|
7
third_party/dlmalloc/dlmalloc.mk
vendored
7
third_party/dlmalloc/dlmalloc.mk
vendored
|
@ -50,6 +50,13 @@ $(THIRD_PARTY_DLMALLOC_A).pkg: \
|
|||
$(THIRD_PARTY_DLMALLOC_A_OBJS) \
|
||||
$(foreach x,$(THIRD_PARTY_DLMALLOC_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
# README file recommends -O3
|
||||
# It does double performance in default mode
|
||||
o//third_party/dlmalloc/dlmalloc.o \
|
||||
o/rel/third_party/dlmalloc/dlmalloc.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-O3
|
||||
|
||||
# we can't use address sanitizer because:
|
||||
# address sanitizer depends on dlmalloc
|
||||
o/$(MODE)/third_party/dlmalloc/dlmalloc.o: \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue