diff --git a/examples/hello4.c b/examples/hello4.c new file mode 100644 index 000000000..61334cc25 --- /dev/null +++ b/examples/hello4.c @@ -0,0 +1,17 @@ +#if 0 +/*─────────────────────────────────────────────────────────────────╗ +│ To the extent possible under law, Justine Tunney has waived │ +│ all copyright and related or neighboring rights to this file, │ +│ as it is written in the following disclaimers: │ +│ • http://unlicense.org/ │ +│ • http://creativecommons.org/publicdomain/zero/1.0/ │ +╚─────────────────────────────────────────────────────────────────*/ +#endif +#include "libc/math.h" +#include "libc/stdio/stdio.h" + +int main(int argc, char *argv[]) { + volatile double x = 123; + printf("cos(%g) is %g\n", x, cos(x)); + return 0; +} diff --git a/libc/mem/arena.c b/libc/mem/arena.c index a6cf0168a..63407f810 100644 --- a/libc/mem/arena.c +++ b/libc/mem/arena.c @@ -126,7 +126,7 @@ static dontinline bool __arena_grow(size_t offset, size_t request) { return false; } -static void *__arena_alloc(size_t a, size_t n) { +static inline void *__arena_alloc(size_t a, size_t n) { size_t o; if (!n) n = 1; o = ROUNDUP(__arena.offset[__arena.depth] + sizeof(size_t), a); @@ -299,6 +299,25 @@ static void __arena_init(void) { atexit(__arena_destroy); } +/** + * Pushes memory arena. + * + * This allocator gives a ~3x performance boost over dlmalloc, mostly + * because it isn't thread safe and it doesn't do defragmentation. + * + * Calling this function will push a new arena. It may be called + * multiple times from the main thread recursively. The first time it's + * called, it hooks all the regular memory allocation functions. Any + * allocations that were made previously outside the arena, will be + * passed on to the previous hooks. Then, the basic idea, is rather than + * bothering with free() you can just call __arena_pop() to bulk free. + * + * Arena allocations also have a slight size advantage, since 32-bit + * pointers are always used. The maximum amount of arena memory is + * 805,175,296 bytes. + * + * @see __arena_pop() + */ void __arena_push(void) { if (UNLIKELY(!__arena.once)) { __arena_init(); @@ -313,6 +332,15 @@ void __arena_push(void) { ++__arena.depth; } +/** + * Pops memory arena. + * + * This pops the most recently created arena, freeing all the memory + * that was allocated between the push and pop arena calls. If this is + * the last arena on the stack, then the old malloc hooks are restored. + * + * @see __arena_push() + */ void __arena_pop(void) { size_t a, b, greed; __arena_check(); diff --git a/libc/x/xloadzd.c b/libc/x/xloadzd.c index 2444b3abf..d8afcb7fb 100644 --- a/libc/x/xloadzd.c +++ b/libc/x/xloadzd.c @@ -20,6 +20,7 @@ #include "libc/fmt/leb128.h" #include "libc/intrin/lockcmpxchg.h" #include "libc/nexgen32e/crc32.h" +#include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/x/x.h" #include "third_party/zlib/zlib.h" @@ -47,16 +48,7 @@ void *xloadzd(bool *o, void **t, const void *p, size_t n, size_t m, size_t c, int64_t x, y; assert(z == 2 || z == 4); b = q = malloc(m); - zs.zfree = 0; - zs.zalloc = 0; - zs.next_in = p; - zs.avail_in = n; - zs.total_in = n; - zs.avail_out = m; - zs.total_out = m; - zs.next_out = (void *)q; - inflateInit2(&zs, -MAX_WBITS); - inflate(&zs, Z_NO_FLUSH); + __inflate(q, m, p, n); r = memalign(z, c * z); for (x = i = 0; i < c; ++i) { b += unzleb64(b, 10, &y); diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 07231df8c..37d68f70b 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -1,4 +1,5 @@ #include "libc/assert.h" +#include "libc/bits/likely.h" #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/dce.h" @@ -22,7 +23,7 @@ #define HAVE_MMAP 1 #define HAVE_MREMAP 0 #define HAVE_MORECORE 0 -#define USE_LOCKS 1 +#define USE_SPIN_LOCKS 1 #define MORECORE_CONTIGUOUS 0 #define MALLOC_INSPECT_ALL 1 @@ -820,12 +821,7 @@ void dlfree(void* mem) { void* dlcalloc(size_t n_elements, size_t elem_size) { void* mem; size_t req = 0; - if (n_elements != 0) { - req = n_elements * elem_size; - if (((n_elements | elem_size) & ~(size_t)0xffff) && - (req / n_elements != elem_size)) - req = MAX_SIZE_T; /* force downstream failure on overflow */ - } + if (__builtin_mul_overflow(n_elements, elem_size, &req)) req = -1; mem = dlmalloc(req); if (mem != 0 && calloc_must_clear(mem2chunk(mem))) bzero(mem, req); @@ -1216,7 +1212,7 @@ void* dlrealloc(void* oldmem, size_t bytes) { if (oldmem == 0) { mem = dlmalloc(bytes); } - else if (bytes >= MAX_REQUEST) { + else if (UNLIKELY(bytes >= MAX_REQUEST)) { MALLOC_FAILURE_ACTION; } #ifdef REALLOC_ZERO_BYTES_FREES diff --git a/third_party/dlmalloc/dlmalloc.mk b/third_party/dlmalloc/dlmalloc.mk index 050cbc7e7..516441077 100644 --- a/third_party/dlmalloc/dlmalloc.mk +++ b/third_party/dlmalloc/dlmalloc.mk @@ -50,6 +50,13 @@ $(THIRD_PARTY_DLMALLOC_A).pkg: \ $(THIRD_PARTY_DLMALLOC_A_OBJS) \ $(foreach x,$(THIRD_PARTY_DLMALLOC_A_DIRECTDEPS),$($(x)_A).pkg) +# README file recommends -O3 +# It does double performance in default mode +o//third_party/dlmalloc/dlmalloc.o \ +o/rel/third_party/dlmalloc/dlmalloc.o: \ + OVERRIDE_CFLAGS += \ + -O3 + # we can't use address sanitizer because: # address sanitizer depends on dlmalloc o/$(MODE)/third_party/dlmalloc/dlmalloc.o: \