diff --git a/examples/BUILD.mk b/examples/BUILD.mk index 3b868d186..5ebceb991 100644 --- a/examples/BUILD.mk +++ b/examples/BUILD.mk @@ -91,6 +91,7 @@ EXAMPLES_DIRECTDEPS = \ THIRD_PARTY_TZ \ THIRD_PARTY_VQSORT \ THIRD_PARTY_XED \ + THIRD_PARTY_LIBCXXABI \ THIRD_PARTY_ZLIB \ TOOL_ARGS \ TOOL_BUILD_LIB \ diff --git a/examples/package/BUILD.mk b/examples/package/BUILD.mk index 959b68469..6e387b8d8 100644 --- a/examples/package/BUILD.mk +++ b/examples/package/BUILD.mk @@ -42,6 +42,7 @@ EXAMPLES_PACKAGE_OBJS = \ EXAMPLES_PACKAGE_DIRECTDEPS = \ EXAMPLES_PACKAGE_LIB \ LIBC_INTRIN \ + LIBC_MEM \ LIBC_STDIO \ LIBC_TINYMATH diff --git a/examples/package/lib/BUILD.mk b/examples/package/lib/BUILD.mk index 3fdbe344d..5b53f06dc 100644 --- a/examples/package/lib/BUILD.mk +++ b/examples/package/lib/BUILD.mk @@ -71,6 +71,7 @@ EXAMPLES_PACKAGE_LIB_A_CHECKS = \ EXAMPLES_PACKAGE_LIB_A_DIRECTDEPS = \ LIBC_INTRIN \ LIBC_NEXGEN32E \ + LIBC_MEM \ LIBC_STDIO # Evaluates variable as set of transitive package dependencies. diff --git a/libc/calls/getcpu.c b/libc/calls/getcpu.c index 900a04ec3..bdc97089e 100644 --- a/libc/calls/getcpu.c +++ b/libc/calls/getcpu.c @@ -48,20 +48,11 @@ int getcpu(unsigned *out_opt_cpu, unsigned *out_opt_node) { } else { return __winerr(); } - } else if (IsXnuSilicon()) { - if (__syslib->__version >= 9) { - size_t cpu64; - errno_t err = __syslib->__pthread_cpu_number_np(&cpu64); - if (!err) { - cpu = cpu64; - node = 0; - } else { - errno = err; - return -1; - } - } else { - return enosys(); - } + } else if (IsAarch64()) { + long tpidr_el0; + asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); + cpu = tpidr_el0 & 255; + node = 0; } else { int rc = sys_getcpu(&cpu, &node, 0); if (rc == -1) diff --git a/libc/calls/sched_getcpu.c b/libc/calls/sched_getcpu.c index f738be649..12a0a832b 100644 --- a/libc/calls/sched_getcpu.c +++ b/libc/calls/sched_getcpu.c @@ -17,14 +17,12 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" -#include "libc/calls/struct/cpuset.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/nexgen32e/rdtscp.h" #include "libc/nexgen32e/x86feature.h" #include "libc/nt/struct/processornumber.h" #include "libc/nt/synchronization.h" -#include "libc/runtime/syslib.internal.h" #include "libc/sysv/errfuns.h" int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache); @@ -38,23 +36,14 @@ int sched_getcpu(void) { unsigned tsc_aux; rdtscp(&tsc_aux); return TSC_AUX_CORE(tsc_aux); + } else if (IsAarch64()) { + long tpidr_el0; + asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); + return tpidr_el0 & 255; } else if (IsWindows()) { struct NtProcessorNumber pn; GetCurrentProcessorNumberEx(&pn); return 64 * pn.Group + pn.Number; - } else if (IsXnuSilicon()) { - if (__syslib->__version >= 9) { - size_t cpu; - errno_t err = __syslib->__pthread_cpu_number_np(&cpu); - if (!err) { - return cpu; - } else { - errno = err; - return -1; - } - } else { - return enosys(); - } } else { unsigned cpu = 0; int rc = sys_getcpu(&cpu, 0, 0); diff --git a/libc/dlopen/BUILD.mk b/libc/dlopen/BUILD.mk index 4db5aa081..2c254ca8d 100644 --- a/libc/dlopen/BUILD.mk +++ b/libc/dlopen/BUILD.mk @@ -54,6 +54,8 @@ $(LIBC_DLOPEN_A_OBJS): private \ o/$(MODE)/libc/dlopen/foreign_tramp.o: libc/dlopen/foreign_tramp.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< +$(LIBC_DLOPEN_A_OBJS): private COPTS += -fno-sanitize=address + LIBC_DLOPEN_LIBS = $(foreach x,$(LIBC_DLOPEN_ARTIFACTS),$($(x))) LIBC_DLOPEN_SRCS = $(foreach x,$(LIBC_DLOPEN_ARTIFACTS),$($(x)_SRCS)) LIBC_DLOPEN_HDRS = $(foreach x,$(LIBC_DLOPEN_ARTIFACTS),$($(x)_HDRS)) diff --git a/libc/fmt/BUILD.mk b/libc/fmt/BUILD.mk index 4114c6ba7..2ff357eb9 100644 --- a/libc/fmt/BUILD.mk +++ b/libc/fmt/BUILD.mk @@ -55,7 +55,8 @@ $(LIBC_FMT_A).pkg: \ $(LIBC_FMT_A_OBJS): private \ CFLAGS += \ - -fno-jump-tables + -fno-jump-tables \ + -fno-sanitize=address o/$(MODE)/libc/fmt/formatint64.o \ o/$(MODE)/libc/fmt/formatint64thousands.o \ diff --git a/libc/intrin/BUILD.mk b/libc/intrin/BUILD.mk index 958752e65..dcb05dfea 100644 --- a/libc/intrin/BUILD.mk +++ b/libc/intrin/BUILD.mk @@ -86,13 +86,6 @@ o//libc/intrin/memmove.o: private \ -finline \ -foptimize-sibling-calls -# make asan stack traces shorter -o/$(MODE)/libc/intrin/asanthunk.o: private \ - CFLAGS += \ - -Os \ - $(NO_MAGIC) \ - -foptimize-sibling-calls - o/$(MODE)/libc/intrin/bzero.o \ o/$(MODE)/libc/intrin/memcmp.o \ o/$(MODE)/libc/intrin/memmove.o: private \ diff --git a/libc/intrin/asan.c b/libc/intrin/asan.c index 355a63fec..0f0d9b418 100644 --- a/libc/intrin/asan.c +++ b/libc/intrin/asan.c @@ -35,7 +35,6 @@ #include "libc/log/libfatal.internal.h" #include "libc/log/log.h" #include "libc/macros.internal.h" -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/gc.internal.h" #include "libc/nexgen32e/stackframe.h" @@ -56,25 +55,10 @@ #include "libc/thread/tls.h" #include "third_party/dlmalloc/dlmalloc.h" #ifdef __x86_64__ +#ifdef __SANITIZE_ADDRESS__ __static_yoink("_init_asan"); -#if IsModeDbg() -// MODE=dbg -// O(32mb) of morgue memory -// Θ(64) bytes of malloc overhead -#define ASAN_MORGUE_ITEMS 512 -#define ASAN_MORGUE_THRESHOLD 65536 -#define ASAN_TRACE_ITEMS 16 -#else -// MODE=asan -// O(32mb) of morgue memory -// Θ(32) bytes of malloc overhead -#define ASAN_MORGUE_ITEMS 512 -#define ASAN_MORGUE_THRESHOLD 65536 -#define ASAN_TRACE_ITEMS 4 -#endif - /** * @fileoverview Cosmopolitan Address Sanitizer Runtime. * @@ -110,13 +94,6 @@ __static_yoink("_init_asan"); #define ASAN_LOG(...) (void)0 // kprintf(__VA_ARGS__) -#define HOOK(HOOK, IMPL) \ - do { \ - if (_weaken(HOOK)) { \ - *_weaken(HOOK) = IMPL; \ - } \ - } while (0) - #define REQUIRE(FUNC) \ do { \ if (!_weaken(FUNC)) { \ @@ -124,15 +101,6 @@ __static_yoink("_init_asan"); } \ } while (0) -struct AsanTrace { - uint32_t p[ASAN_TRACE_ITEMS]; // assumes linkage into 32-bit space -}; - -struct AsanExtra { - uint64_t size; - struct AsanTrace bt; -}; - struct AsanSourceLocation { char *filename; int line; @@ -158,16 +126,6 @@ struct AsanGlobal { char *odr_indicator; }; -struct ReportOriginHeap { - const unsigned char *a; - int z; -}; - -static struct AsanMorgue { - _Atomic(unsigned) i; - _Atomic(void *) p[ASAN_MORGUE_ITEMS]; -} __asan_morgue; - int __asan_option_detect_stack_use_after_return = 0; void __asan_version_mismatch_check_v8(void) { @@ -180,26 +138,11 @@ static bool __asan_once(void) { &once, &want, true, memory_order_relaxed, memory_order_relaxed); } -#define __asan_unreachable() \ - do { \ - kprintf("%s:%d: __asan_unreachable()\n", __FILE__, __LINE__); \ - __builtin_trap(); \ - } while (0) - static int __asan_bsf(uint64_t x) { _Static_assert(sizeof(long long) == sizeof(uint64_t), ""); return __builtin_ctzll(x); } -static int __asan_bsr(uint64_t x) { - _Static_assert(sizeof(long long) == sizeof(uint64_t), ""); - return __builtin_clzll(x) ^ 63; -} - -static uint64_t __asan_roundup2pow(uint64_t x) { - return 2ull << __asan_bsr(x - 1); -} - static char *__asan_utf8cpy(char *p, unsigned c) { uint64_t z; z = tpenc(c); @@ -356,7 +299,7 @@ static void __asan_exit(void) { _Exit(99); } -static __wur __asan_die_f *__asan_die(void) { +__wur __asan_die_f *__asan_die(void) { if (_weaken(__die)) { return _weaken(__die); } else { @@ -410,7 +353,7 @@ void __asan_unpoison(void *p, long n) { } } -static bool __asan_is_mapped(int x) { +bool __asan_is_mapped(int x) { // xxx: we can't lock because no reentrant locks yet int i; bool res; @@ -724,12 +667,6 @@ static const char *__asan_describe_access_poison(signed char kind) { } } -static __wur __asan_die_f *__asan_report_invalid_pointer(const void *addr) { - kprintf("\n\e[J\e[1;31masan error\e[0m: this corruption at %p shadow %p\n", - addr, SHADOW(addr)); - return __asan_die(); -} - static char *__asan_format_interval(char *p, intptr_t a, intptr_t b) { p = __asan_hexcpy(p, a, 48), *p++ = '-'; p = __asan_hexcpy(p, b, 48); @@ -750,7 +687,7 @@ static char *__asan_format_section(char *p, const void *p1, const void *p2, return p; } -static void __asan_report_memory_origin_image(intptr_t a, int z) { +void __asan_report_memory_origin_image(intptr_t a, int z) { unsigned l, m, r, n, k; struct SymbolTable *st; kprintf("\nthe memory belongs to image symbols\n"); @@ -787,32 +724,8 @@ static void __asan_report_memory_origin_image(intptr_t a, int z) { } } -static void __asan_onmemory(void *x, void *y, size_t n, void *a) { - const unsigned char *p = x; - struct ReportOriginHeap *t = a; - if ((p <= t->a && t->a < p + n) || - (p <= t->a + t->z && t->a + t->z < p + n) || - (t->a < p && p + n <= t->a + t->z)) { - kprintf("%p %,lu bytes [dlmalloc]", x, n); - __asan_print_trace(x); - kprintf("\n"); - } -} - -static void __asan_report_memory_origin_heap(const unsigned char *a, int z) { - struct ReportOriginHeap t; - kprintf("\nthe memory was allocated by\n"); - if (_weaken(malloc_inspect_all)) { - t.a = a; - t.z = z; - _weaken(malloc_inspect_all)(__asan_onmemory, &t); - } else { - kprintf("\tunknown please __static_yoink(\"malloc_inspect_all\");\n"); - } -} - -static void __asan_report_memory_origin(const unsigned char *addr, int size, - signed char kind) { +void __asan_report_memory_origin(const unsigned char *addr, int size, + signed char kind) { switch (kind) { case kAsanStackOverrun: case kAsanGlobalOverrun: @@ -837,7 +750,8 @@ static void __asan_report_memory_origin(const unsigned char *addr, int size, if (__executable_start <= addr && addr < _end) { __asan_report_memory_origin_image((intptr_t)addr, size); } else if (IsAutoFrame((intptr_t)addr >> 16)) { - __asan_report_memory_origin_heap(addr, size); + if (_weaken(__asan_report_memory_origin_heap)) + _weaken(__asan_report_memory_origin_heap)(addr, size); } } @@ -972,49 +886,7 @@ static __wur __asan_die_f *__asan_report_memory_fault(void *addr, int size, __asan_fault(SHADOW(addr), -128).kind); } -static void *__asan_morgue_add(void *p) { - return atomic_exchange_explicit( - __asan_morgue.p + (atomic_fetch_add_explicit(&__asan_morgue.i, 1, - memory_order_acq_rel) & - (ARRAYLEN(__asan_morgue.p) - 1)), - p, memory_order_acq_rel); -} - -__attribute__((__destructor__)) static void __asan_morgue_flush(void) { - unsigned i; - for (i = 0; i < ARRAYLEN(__asan_morgue.p); ++i) { - if (atomic_load_explicit(__asan_morgue.p + i, memory_order_acquire)) { - _weaken(dlfree)(atomic_exchange_explicit(__asan_morgue.p + i, 0, - memory_order_release)); - } - } -} - -static size_t __asan_heap_size(size_t n) { - if (n < 0x7fffffff0000) { - n = ROUNDUP(n, _Alignof(struct AsanExtra)); - return __asan_roundup2pow(n + sizeof(struct AsanExtra)); - } else { - return -1; - } -} - -static void __asan_write48(uint64_t *value, uint64_t x) { - uint64_t cookie; - cookie = 'J' | 'T' << 8; - cookie ^= x & 0xffff; - *value = (x & 0xffffffffffff) | cookie << 48; -} - -static bool __asan_read48(uint64_t value, uint64_t *x) { - uint64_t cookie; - cookie = value >> 48; - cookie ^= value & 0xffff; - *x = (int64_t)(value << 16) >> 16; - return cookie == ('J' | 'T' << 8); -} - -static void __asan_rawtrace(struct AsanTrace *bt, const struct StackFrame *bp) { +void __asan_rawtrace(struct AsanTrace *bt, const struct StackFrame *bp) { size_t i; for (i = 0; bp && i < ARRAYLEN(bt->p); ++i, bp = bp->next) { if (kisdangerous(bp)) @@ -1058,234 +930,6 @@ static void __asan_trace(struct AsanTrace *bt, const struct StackFrame *bp) { } } -#define __asan_trace __asan_rawtrace - -static void *__asan_allocate(size_t a, size_t n, struct AsanTrace *bt, - int underrun, int overrun, int initializer) { - char *p; - size_t c; - struct AsanExtra *e; - if ((p = _weaken(dlmemalign)(a, __asan_heap_size(n)))) { - c = _weaken(dlmalloc_usable_size)(p); - e = (struct AsanExtra *)(p + c - sizeof(*e)); - __asan_unpoison(p, n); - __asan_poison(p - 16, 16, underrun); /* see dlmalloc design */ - __asan_poison(p + n, c - n, overrun); - __asan_memset(p, initializer, n); - __asan_write48(&e->size, n); - __asan_memcpy(&e->bt, bt, sizeof(*bt)); - } - return p; -} - -static void *__asan_allocate_heap(size_t a, size_t n, struct AsanTrace *bt) { - return __asan_allocate(a, n, bt, kAsanHeapUnderrun, kAsanHeapOverrun, 0xf9); -} - -static struct AsanExtra *__asan_get_extra(const void *p, size_t *c) { - int f; - long x, n; - struct AsanExtra *e; - f = (intptr_t)p >> 16; - if (!kisdangerous(p) && - (n = _weaken(dlmalloc_usable_size)((void *)p)) > sizeof(*e) && - !ckd_add(&x, (intptr_t)p, n) && x <= 0x800000000000 && - (LIKELY(f == (int)((x - 1) >> 16)) || !kisdangerous((void *)(x - 1))) && - (LIKELY(f == (int)((x = x - sizeof(*e)) >> 16)) || - __asan_is_mapped(x >> 16)) && - !(x & (_Alignof(struct AsanExtra) - 1))) { - *c = n; - return (struct AsanExtra *)x; - } else { - return 0; - } -} - -size_t __asan_get_heap_size(const void *p) { - size_t n, c; - struct AsanExtra *e; - if ((e = __asan_get_extra(p, &c)) && __asan_read48(e->size, &n)) { - return n; - } - return 0; -} - -static size_t __asan_malloc_usable_size(void *p) { - size_t n, c; - struct AsanExtra *e; - if ((e = __asan_get_extra(p, &c)) && __asan_read48(e->size, &n)) { - return n; - } - __asan_report_invalid_pointer(p)(); - __asan_unreachable(); -} - -int __asan_print_trace(void *p) { - size_t c, i, n; - struct AsanExtra *e; - if (!(e = __asan_get_extra(p, &c))) { - kprintf(" bad pointer"); - return einval(); - } - if (!__asan_read48(e->size, &n)) { - kprintf(" bad cookie"); - return -1; - } - kprintf("\n%p %,lu bytes [asan]", (char *)p, n); - if (!__asan_is_mapped((((intptr_t)p >> 3) + 0x7fff8000) >> 16)) { - kprintf(" (shadow not mapped?!)"); - } - for (i = 0; i < ARRAYLEN(e->bt.p) && e->bt.p[i]; ++i) { - kprintf("\n%*lx %t", 12, e->bt.p[i], e->bt.p[i]); - } - return 0; -} - -// Returns true if `p` was allocated by an IGNORE_LEAKS(function). -int __asan_is_leaky(void *p) { - int sym; - size_t c, i, n; - intptr_t f, *l; - struct AsanExtra *e; - struct SymbolTable *st; - if (!_weaken(GetSymbolTable)) - notpossible; - if (!(e = __asan_get_extra(p, &c))) - return 0; - if (!__asan_read48(e->size, &n)) - return 0; - if (!__asan_is_mapped((((intptr_t)p >> 3) + 0x7fff8000) >> 16)) - return 0; - if (!(st = GetSymbolTable())) - return 0; - for (i = 0; i < ARRAYLEN(e->bt.p) && e->bt.p[i]; ++i) { - if ((sym = _weaken(__get_symbol)(st, e->bt.p[i])) == -1) - continue; - f = st->addr_base + st->symbols[sym].x; - for (l = _leaky_start; l < _leaky_end; ++l) { - if (f == *l) { - return 1; - } - } - } - return 0; -} - -static void __asan_deallocate(char *p, long kind) { - size_t c, n; - struct AsanExtra *e; - if ((e = __asan_get_extra(p, &c))) { - if (__asan_read48(e->size, &n)) { - __asan_poison(p, c, kind); - if (c <= ASAN_MORGUE_THRESHOLD) { - p = __asan_morgue_add(p); - } - _weaken(dlfree)(p); - } else { - __asan_report_invalid_pointer(p)(); - __asan_unreachable(); - } - } else { - __asan_report_invalid_pointer(p)(); - __asan_unreachable(); - } -} - -void __asan_free(void *p) { - if (!p) - return; - __asan_deallocate(p, kAsanHeapFree); -} - -size_t __asan_bulk_free(void *p[], size_t n) { - size_t i; - for (i = 0; i < n; ++i) { - if (p[i]) { - __asan_deallocate(p[i], kAsanHeapFree); - p[i] = 0; - } - } - return 0; -} - -static void *__asan_realloc_nogrow(void *p, size_t n, size_t m, - struct AsanTrace *bt) { - return 0; -} - -static void *__asan_realloc_grow(void *p, size_t n, size_t m, - struct AsanTrace *bt) { - char *q; - if ((q = __asan_allocate_heap(16, n, bt))) { - __asan_memcpy(q, p, m); - __asan_deallocate(p, kAsanHeapRelocated); - } - return q; -} - -static void *__asan_realloc_impl(void *p, size_t n, - void *grow(void *, size_t, size_t, - struct AsanTrace *)) { - size_t c, m; - struct AsanExtra *e; - if ((e = __asan_get_extra(p, &c))) { - if (__asan_read48(e->size, &m)) { - if (n <= m) { // shrink - __asan_poison((char *)p + n, m - n, kAsanHeapOverrun); - __asan_write48(&e->size, n); - return p; - } else if (n <= c - sizeof(struct AsanExtra)) { // small growth - __asan_unpoison((char *)p + m, n - m); - __asan_write48(&e->size, n); - return p; - } else { // exponential growth - return grow(p, n, m, &e->bt); - } - } - } - __asan_report_invalid_pointer(p)(); - __asan_unreachable(); -} - -void *__asan_malloc(size_t size) { - struct AsanTrace bt; - __asan_trace(&bt, RBP); - return __asan_allocate_heap(16, size, &bt); -} - -void *__asan_memalign(size_t align, size_t size) { - struct AsanTrace bt; - __asan_trace(&bt, RBP); - return __asan_allocate_heap(align, size, &bt); -} - -void *__asan_calloc(size_t n, size_t m) { - struct AsanTrace bt; - __asan_trace(&bt, RBP); - if (ckd_mul(&n, n, m)) - n = -1; - return __asan_allocate(16, n, &bt, kAsanHeapUnderrun, kAsanHeapOverrun, 0x00); -} - -void *__asan_realloc(void *p, size_t n) { - struct AsanTrace bt; - if (p) { - return __asan_realloc_impl(p, n, __asan_realloc_grow); - } else { - __asan_trace(&bt, RBP); - return __asan_allocate_heap(16, n, &bt); - } -} - -void *__asan_realloc_in_place(void *p, size_t n) { - return p ? __asan_realloc_impl(p, n, __asan_realloc_nogrow) : 0; -} - -int __asan_malloc_trim(size_t pad) { - __asan_morgue_flush(); - return _weaken(dlmalloc_trim) ? _weaken(dlmalloc_trim)(pad) : 0; -} - void __asan_register_globals(struct AsanGlobal g[], int n) { int i; __asan_poison(g, sizeof(*g) * n, kAsanProtected); @@ -1328,19 +972,6 @@ void __asan_report_store(uint8_t *addr, int size) { __asan_unreachable(); } -void *__asan_stack_malloc(size_t size, int classid) { - struct AsanTrace bt; - ASAN_LOG("__asan_stack_malloc(%zu, %d)\n", size, classid); - __asan_trace(&bt, RBP); - return __asan_allocate(16, size, &bt, kAsanStackUnderrun, kAsanStackOverrun, - 0xf9); -} - -void __asan_stack_free(char *p, size_t size, int classid) { - ASAN_LOG("__asan_stack_free(%p, %zu, %d)\n", p, size, classid); - __asan_deallocate(p, kAsanStackFree); -} - void __asan_handle_no_return(void) { // this check is stupid and has far-reaching toilsome ramifications } @@ -1402,18 +1033,6 @@ void __asan_after_dynamic_init(void) { ASAN_LOG("__asan_after_dynamic_init()\n"); } -void __asan_install_malloc_hooks(void) { - HOOK(hook_free, __asan_free); - HOOK(hook_malloc, __asan_malloc); - HOOK(hook_calloc, __asan_calloc); - HOOK(hook_realloc, __asan_realloc); - HOOK(hook_memalign, __asan_memalign); - HOOK(hook_bulk_free, __asan_bulk_free); - HOOK(hook_malloc_trim, __asan_malloc_trim); - HOOK(hook_realloc_in_place, __asan_realloc_in_place); - HOOK(hook_malloc_usable_size, __asan_malloc_usable_size); -} - void __asan_map_shadow(uintptr_t p, size_t n) { // assume _mmi.lock is held void *addr; @@ -1496,13 +1115,6 @@ void __asan_init(int argc, char **argv, char **envp, unsigned long *auxv) { static bool once; if (!_cmpxchg(&once, false, true)) return; - if (_weaken(hook_malloc) || _weaken(hook_calloc) || _weaken(hook_realloc) || - _weaken(hook_realloc_in_place) || _weaken(hook_free) || - _weaken(hook_malloc_usable_size)) { - REQUIRE(dlfree); - REQUIRE(dlmemalign); - REQUIRE(dlmalloc_usable_size); - } __asan_shadow_existing_mappings(); __asan_map_shadow((uintptr_t)__executable_start, _end - __executable_start); __asan_map_shadow(0, 4096); @@ -1510,7 +1122,6 @@ void __asan_init(int argc, char **argv, char **envp, unsigned long *auxv) { if (!IsWindows()) { sys_mprotect((void *)0x7fff8000, 0x10000, PROT_READ); } - __asan_install_malloc_hooks(); STRACE(" _ ____ _ _ _ "); STRACE(" / \\ / ___| / \\ | \\ | |"); STRACE(" / _ \\ \\___ \\ / _ \\ | \\| |"); @@ -1522,4 +1133,5 @@ void __asan_init(int argc, char **argv, char **envp, unsigned long *auxv) { __weak_reference(__asan_poison, __asan_poison_memory_region); __weak_reference(__asan_unpoison, __asan_unpoison_memory_region); +#endif /* __SANITIZE_ADDRESS__ */ #endif /* __x86_64__ */ diff --git a/libc/intrin/asan.internal.h b/libc/intrin/asan.internal.h index a146b8999..d2fea114d 100644 --- a/libc/intrin/asan.internal.h +++ b/libc/intrin/asan.internal.h @@ -3,10 +3,33 @@ #include "libc/calls/struct/iovec.h" #include "libc/intrin/asancodes.h" #include "libc/macros.internal.h" +#include "libc/nexgen32e/stackframe.h" COSMOPOLITAN_C_START_ +#ifdef MODE_DBG +// MODE=dbg +// O(32mb) of morgue memory +// Θ(64) bytes of malloc overhead +#define ASAN_MORGUE_ITEMS 512 +#define ASAN_MORGUE_THRESHOLD 65536 +#define ASAN_TRACE_ITEMS 16 +#else +// MODE=asan +// O(32mb) of morgue memory +// Θ(32) bytes of malloc overhead +#define ASAN_MORGUE_ITEMS 512 +#define ASAN_MORGUE_THRESHOLD 65536 +#define ASAN_TRACE_ITEMS 4 +#endif + #define SHADOW(x) ((signed char *)(((intptr_t)(x) >> kAsanScale) + kAsanMagic)) -#define UNSHADOW(x) ((void *)(MAX(0, (intptr_t)(x)-kAsanMagic) << kAsanScale)) +#define UNSHADOW(x) ((void *)(MAX(0, (intptr_t)(x) - kAsanMagic) << kAsanScale)) + +#define __asan_unreachable() \ + do { \ + kprintf("%s:%d: __asan_unreachable()\n", __FILE__, __LINE__); \ + __builtin_trap(); \ + } while (0) typedef void __asan_die_f(void); @@ -15,6 +38,10 @@ struct AsanFault { const signed char *shadow; }; +struct AsanTrace { + uint32_t p[ASAN_TRACE_ITEMS]; // assumes linkage into 32-bit space +}; + void __asan_unpoison(void *, long); void __asan_poison(void *, long, signed char); void __asan_verify(const void *, size_t); @@ -27,19 +54,17 @@ bool __asan_is_valid_iov(const struct iovec *, int) nosideeffect; struct AsanFault __asan_check(const void *, long) nosideeffect; struct AsanFault __asan_check_str(const char *) nosideeffect; -void __asan_free(void *); -void *__asan_malloc(size_t); +bool __asan_is_mapped(int); int __asan_is_leaky(void *); -int __asan_malloc_trim(size_t); int __asan_print_trace(void *); -void *__asan_calloc(size_t, size_t); -void *__asan_realloc(void *, size_t); -void *__asan_memalign(size_t, size_t); -size_t __asan_get_heap_size(const void *); -void *__asan_realloc_in_place(void *, size_t); - +__asan_die_f *__asan_die(void) __wur; void __asan_memset(void *, char, size_t); +size_t __asan_get_heap_size(const void *); void *__asan_memcpy(void *, const void *, size_t); +void __asan_rawtrace(struct AsanTrace *, const struct StackFrame *); +void __asan_report_memory_origin(const unsigned char *, int, signed char); +void __asan_report_memory_origin_image(intptr_t, int); +void __asan_report_memory_origin_heap(const unsigned char *, int); COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_LIBC_INTRIN_ASAN_H_ */ diff --git a/libc/intrin/asaninit.S b/libc/intrin/asaninit.S index 6b05a74ea..aaa893964 100644 --- a/libc/intrin/asaninit.S +++ b/libc/intrin/asaninit.S @@ -25,6 +25,7 @@ mov %r13,%rsi mov %r14,%rdx mov %r15,%rcx + .weak __asan_init call __asan_init pop %rsi pop %rdi diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index 031989882..d1fa8292c 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -816,7 +816,7 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt, goto FormatString; } base = 4; - hash = '&'; + /* hash = '&'; */ goto FormatNumber; } diff --git a/libc/tinymath/magicu.c b/libc/intrin/magicu.c similarity index 99% rename from libc/tinymath/magicu.c rename to libc/intrin/magicu.c index c03d8bb87..6f27bd07a 100644 --- a/libc/tinymath/magicu.c +++ b/libc/intrin/magicu.c @@ -16,7 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/magicu.h" +#include "libc/intrin/magicu.h" #include "libc/assert.h" /** diff --git a/libc/tinymath/magicu.h b/libc/intrin/magicu.h similarity index 100% rename from libc/tinymath/magicu.h rename to libc/intrin/magicu.h diff --git a/libc/irq/BUILD.mk b/libc/irq/BUILD.mk index a3fb241a3..83b3e6d97 100644 --- a/libc/irq/BUILD.mk +++ b/libc/irq/BUILD.mk @@ -43,6 +43,8 @@ $(LIBC_IRQ_A).pkg: \ $(LIBC_IRQ_A_OBJS) \ $(foreach x,$(LIBC_IRQ_A_DIRECTDEPS),$($(x)_A).pkg) +$(LIBC_IRQ_A_OBJS): private COPTS += -fno-sanitize=address + LIBC_IRQ_LIBS = $(foreach x,$(LIBC_IRQ_ARTIFACTS),$($(x))) LIBC_IRQ_SRCS = $(foreach x,$(LIBC_IRQ_ARTIFACTS),$($(x)_SRCS)) LIBC_IRQ_HDRS = $(foreach x,$(LIBC_IRQ_ARTIFACTS),$($(x)_HDRS)) diff --git a/libc/isystem/cosmo.h b/libc/isystem/cosmo.h index bf875e7b3..4bda8eb58 100644 --- a/libc/isystem/cosmo.h +++ b/libc/isystem/cosmo.h @@ -43,7 +43,6 @@ #include "libc/intrin/weaken.h" #include "libc/mem/critbit0.h" #include "libc/mem/gc.h" -#include "libc/mem/hook.internal.h" #include "libc/nexgen32e/rdtsc.h" #include "libc/nexgen32e/stackframe.h" #include "libc/nexgen32e/x86feature.h" diff --git a/libc/log/memlog.c b/libc/log/memlog.c deleted file mode 100644 index b6ea93df8..000000000 --- a/libc/log/memlog.c +++ /dev/null @@ -1,299 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" -#include "libc/atomic.h" -#include "libc/intrin/atomic.h" -#include "libc/intrin/kprintf.h" -#include "libc/log/backtrace.internal.h" -#include "libc/log/log.h" -#include "libc/macros.internal.h" -#include "libc/mem/hook.internal.h" -#include "libc/mem/mem.h" -#include "libc/runtime/symbols.internal.h" -#include "libc/sysv/consts/o.h" -#include "libc/thread/thread.h" -#include "third_party/dlmalloc/dlmalloc.h" - -/** - * @fileoverview Malloc Logging - * - * If you put the following in your main file: - * - * __static_yoink("enable_memory_log"); - * - * Then memory allocations with constant backtraces will be logged to - * standard error. The columns printed are - * - * MEM TID OP USAGE PTR OLD SIZE CALLER1 CALLER2 CALLER3 CALLER4 - * - * delimited by spaces. For example, to see peak malloc usage: - * - * ./myprog 2>log - * grep ^MEM log | sort -nk4 | tail -n10 - * - * To see the largest allocations: - * - * ./myprog 2>log - * grep ^MEM log | grep -v free | sort -nk7 | tail -n10 - */ - -static struct Memlog { - void (*free)(void *); - void *(*malloc)(size_t); - void *(*calloc)(size_t, size_t); - void *(*memalign)(size_t, size_t); - void *(*realloc)(void *, size_t); - void *(*realloc_in_place)(void *, size_t); - size_t (*bulk_free)(void *[], size_t); - struct Allocs { - long i, n, f; - struct Alloc { - void *addr; - long size; - } *p; - } allocs; - atomic_long usage; -} __memlog; - -static pthread_mutex_t __memlog_lock_obj; - -static void __memlog_lock(void) { - pthread_mutex_lock(&__memlog_lock_obj); -} - -static void __memlog_unlock(void) { - pthread_mutex_unlock(&__memlog_lock_obj); -} - -static long __memlog_size(void *p) { - return malloc_usable_size(p) + 16; -} - -static void __memlog_backtrace(struct StackFrame *frame, intptr_t *a, - intptr_t *b, intptr_t *c, intptr_t *d) { - *a = *b = *c = *d = 0; - if (!frame) - return; - *a = frame->addr; - if (!(frame = frame->next)) - return; - *b = frame->addr; - if (!(frame = frame->next)) - return; - *c = frame->addr; - if (!(frame = frame->next)) - return; - *d = frame->addr; -} - -static long __memlog_find(void *p) { - long i; - for (i = 0; i < __memlog.allocs.i; ++i) { - if (__memlog.allocs.p[i].addr == p) { - return i; - } - } - return -1; -} - -static void __memlog_insert(void *p) { - long i, n, n2; - struct Alloc *p2; - n = __memlog_size(p); - for (i = __memlog.allocs.f; i < __memlog.allocs.i; ++i) { - if (!__memlog.allocs.p[i].addr) { - __memlog.allocs.p[i].addr = p; - __memlog.allocs.p[i].size = n; - __memlog.usage += n; - return; - } - } - if (i == __memlog.allocs.n) { - p2 = __memlog.allocs.p; - n2 = __memlog.allocs.n; - n2 += 1; - n2 += n2 >> 1; - if ((p2 = dlrealloc(p2, n2 * sizeof(*p2)))) { - __memlog.allocs.p = p2; - __memlog.allocs.n = n2; - } else { - return; - } - } - __memlog.allocs.p[i].addr = p; - __memlog.allocs.p[i].size = n; - __memlog.allocs.i++; - __memlog.usage += n; -} - -static void __memlog_update(void *p2, void *p) { - long i, n; - n = __memlog_size(p2); - for (i = 0; i < __memlog.allocs.i; ++i) { - if (__memlog.allocs.p[i].addr == p) { - __memlog.usage += n - __memlog.allocs.p[i].size; - __memlog.allocs.p[i].addr = p2; - __memlog.allocs.p[i].size = n; - unassert(__memlog.usage >= 0); - return; - } - } - __builtin_unreachable(); -} - -static void __memlog_log(struct StackFrame *frame, const char *op, void *res, - void *old, size_t n) { - intptr_t a, b, c, d; - __memlog_backtrace(frame, &a, &b, &c, &d); - kprintf("MEM %6P %7s %12ld %14p %14p %8zu %t %t %t %t\n", op, - atomic_load(&__memlog.usage), res, old, n, a, b, c, d); -} - -static void __memlog_free(void *p) { - long i, n; - if (!p) - return; - __memlog_lock(); - if ((i = __memlog_find(p)) != -1) { - n = __memlog.allocs.p[i].size; - __memlog.allocs.p[i].addr = 0; - __memlog.usage -= __memlog.allocs.p[i].size; - __memlog.allocs.f = MIN(__memlog.allocs.f, i); - unassert(__memlog.usage >= 0); - } else { - kprintf("memlog could not find %p\n", p); - notpossible; - } - __memlog_unlock(); - unassert(__memlog.free); - __memlog.free(p); - __memlog_log(__builtin_frame_address(0), "free", 0, p, n); -} - -static void *__memlog_malloc(size_t n) { - void *res; - unassert(__memlog.malloc); - if ((res = __memlog.malloc(n))) { - __memlog_lock(); - __memlog_insert(res); - __memlog_unlock(); - __memlog_log(__builtin_frame_address(0), "malloc", res, 0, n); - } - return res; -} - -static void *__memlog_calloc(size_t n, size_t z) { - void *res; - unassert(__memlog.calloc); - if ((res = __memlog.calloc(n, z))) { - __memlog_lock(); - __memlog_insert(res); - __memlog_unlock(); - __memlog_log(__builtin_frame_address(0), "malloc", res, 0, n * z); - } - return res; -} - -static void *__memlog_memalign(size_t l, size_t n) { - void *res; - unassert(__memlog.memalign); - if ((res = __memlog.memalign(l, n))) { - __memlog_lock(); - __memlog_insert(res); - __memlog_unlock(); - __memlog_log(__builtin_frame_address(0), "malloc", res, 0, n); - } - return res; -} - -static void *__memlog_realloc_impl(void *p, size_t n, - void *(*f)(void *, size_t), - struct StackFrame *frame) { - void *res; - unassert(f); - if ((res = f(p, n))) { - __memlog_lock(); - if (p) { - __memlog_update(res, p); - } else { - __memlog_insert(res); - } - __memlog_unlock(); - __memlog_log(frame, "realloc", res, p, n); - } - return res; -} - -static void *__memlog_realloc(void *p, size_t n) { - return __memlog_realloc_impl(p, n, __memlog.realloc, - __builtin_frame_address(0)); -} - -static void *__memlog_realloc_in_place(void *p, size_t n) { - return __memlog_realloc_impl(p, n, __memlog.realloc_in_place, - __builtin_frame_address(0)); -} - -static size_t __memlog_bulk_free(void *p[], size_t n) { - size_t i; - for (i = 0; i < n; ++i) { - __memlog_free(p[i]); - p[i] = 0; - } - return 0; -} - -static textexit void __memlog_destroy(void) { - __memlog_lock(); - hook_free = __memlog.free; - hook_malloc = __memlog.malloc; - hook_calloc = __memlog.calloc; - hook_realloc = __memlog.realloc; - hook_memalign = __memlog.memalign; - hook_bulk_free = __memlog.bulk_free; - hook_realloc_in_place = __memlog.realloc_in_place; - dlfree(__memlog.allocs.p); - __memlog.allocs.p = 0; - __memlog.allocs.i = 0; - __memlog.allocs.n = 0; - __memlog_unlock(); -} - -__attribute__((__constructor__(90))) // -static textstartup void -__memlog_init(void) { - GetSymbolTable(); - __memlog_lock(); - __memlog.free = hook_free; - hook_free = __memlog_free; - __memlog.malloc = hook_malloc; - hook_malloc = __memlog_malloc; - __memlog.calloc = hook_calloc; - hook_calloc = __memlog_calloc; - __memlog.realloc = hook_realloc; - hook_realloc = __memlog_realloc; - __memlog.memalign = hook_memalign; - hook_memalign = __memlog_memalign; - __memlog.bulk_free = hook_bulk_free; - hook_bulk_free = __memlog_bulk_free; - __memlog.realloc_in_place = hook_realloc_in_place; - hook_realloc_in_place = __memlog_realloc_in_place; - atexit(__memlog_destroy); - __memlog_unlock(); -} diff --git a/libc/mem/BUILD.mk b/libc/mem/BUILD.mk index bc9ab42cb..3f496846d 100644 --- a/libc/mem/BUILD.mk +++ b/libc/mem/BUILD.mk @@ -44,6 +44,26 @@ $(LIBC_MEM_A_OBJS): private \ -Wframe-larger-than=4096 \ -Walloca-larger-than=4096 +o/$(MODE)/libc/intrin/asan.o: private \ + CFLAGS += \ + -O2 \ + -finline \ + -finline-functions \ + -x-no-pg \ + -ffreestanding \ + -fno-sanitize=all \ + -fno-stack-protector \ + -Wframe-larger-than=4096 \ + -Walloca-larger-than=4096 \ + -fpatchable-function-entry=0,0 + +# make asan stack traces shorter +o/$(MODE)/libc/intrin/asanthunk.o: private \ + CFLAGS += \ + -Os \ + $(NO_MAGIC) \ + -foptimize-sibling-calls + LIBC_MEM_LIBS = $(foreach x,$(LIBC_MEM_ARTIFACTS),$($(x))) LIBC_MEM_SRCS = $(foreach x,$(LIBC_MEM_ARTIFACTS),$($(x)_SRCS)) LIBC_MEM_HDRS = $(foreach x,$(LIBC_MEM_ARTIFACTS),$($(x)_HDRS)) diff --git a/libc/mem/asan.c b/libc/mem/asan.c new file mode 100644 index 000000000..57c51287d --- /dev/null +++ b/libc/mem/asan.c @@ -0,0 +1,361 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/intrin/atomic.h" +#include "libc/intrin/kprintf.h" +#include "libc/intrin/leaky.internal.h" +#include "libc/intrin/likely.h" +#include "libc/intrin/weaken.h" +#include "libc/macros.internal.h" +#include "libc/mem/mem.h" +#include "libc/runtime/symbols.internal.h" +#include "libc/stdckdint.h" +#include "libc/sysv/errfuns.h" +#include "libc/thread/thread.h" +#include "third_party/dlmalloc/dlmalloc.h" +#ifdef __SANITIZE_ADDRESS__ + +#define RBP __builtin_frame_address(0) + +struct AsanExtra { + uint64_t size; + struct AsanTrace bt; +}; + +struct ReportOriginHeap { + const unsigned char *a; + int z; +}; + +static struct AsanMorgue { + _Atomic(unsigned) i; + _Atomic(void *) p[ASAN_MORGUE_ITEMS]; +} __asan_morgue; + +static pthread_spinlock_t __asan_lock; + +static int __asan_bsr(uint64_t x) { + _Static_assert(sizeof(long long) == sizeof(uint64_t), ""); + return __builtin_clzll(x) ^ 63; +} + +static uint64_t __asan_roundup2pow(uint64_t x) { + return 2ull << __asan_bsr(x - 1); +} + +static void __asan_write48(uint64_t *value, uint64_t x) { + uint64_t cookie; + cookie = 'J' | 'T' << 8; + cookie ^= x & 0xffff; + *value = (x & 0xffffffffffff) | cookie << 48; +} + +static bool __asan_read48(uint64_t value, uint64_t *x) { + uint64_t cookie; + cookie = value >> 48; + cookie ^= value & 0xffff; + *x = (int64_t)(value << 16) >> 16; + return cookie == ('J' | 'T' << 8); +} + +static void *__asan_morgue_add(void *p) { + return atomic_exchange_explicit( + __asan_morgue.p + (atomic_fetch_add_explicit(&__asan_morgue.i, 1, + memory_order_acq_rel) & + (ARRAYLEN(__asan_morgue.p) - 1)), + p, memory_order_acq_rel); +} + +__attribute__((__destructor__)) static void __asan_morgue_flush(void) { + unsigned i; + for (i = 0; i < ARRAYLEN(__asan_morgue.p); ++i) + if (atomic_load_explicit(__asan_morgue.p + i, memory_order_acquire)) + dlfree(atomic_exchange_explicit(__asan_morgue.p + i, 0, + memory_order_release)); +} + +static size_t __asan_heap_size(size_t n) { + if (n < 0x7fffffff0000) { + n = ROUNDUP(n, _Alignof(struct AsanExtra)); + return __asan_roundup2pow(n + sizeof(struct AsanExtra)); + } else { + return -1; + } +} + +static void *__asan_allocate(size_t a, size_t n, struct AsanTrace *bt, + int underrun, int overrun, int initializer) { + char *p; + size_t c; + struct AsanExtra *e; + if ((p = dlmemalign(a, __asan_heap_size(n)))) { + c = dlmalloc_usable_size(p); + e = (struct AsanExtra *)(p + c - sizeof(*e)); + __asan_unpoison(p, n); + __asan_poison(p - 16, 16, underrun); /* see dlmalloc design */ + __asan_poison(p + n, c - n, overrun); + __asan_memset(p, initializer, n); + __asan_write48(&e->size, n); + __asan_memcpy(&e->bt, bt, sizeof(*bt)); + } + return p; +} + +static void *__asan_allocate_heap(size_t a, size_t n, struct AsanTrace *bt) { + return __asan_allocate(a, n, bt, kAsanHeapUnderrun, kAsanHeapOverrun, 0xf9); +} + +static struct AsanExtra *__asan_get_extra(const void *p, size_t *c) { + int f; + long x, n; + struct AsanExtra *e; + f = (intptr_t)p >> 16; + if (!kisdangerous(p) && (n = dlmalloc_usable_size((void *)p)) > sizeof(*e) && + !ckd_add(&x, (intptr_t)p, n) && x <= 0x800000000000 && + (LIKELY(f == (int)((x - 1) >> 16)) || !kisdangerous((void *)(x - 1))) && + (LIKELY(f == (int)((x = x - sizeof(*e)) >> 16)) || + __asan_is_mapped(x >> 16)) && + !(x & (_Alignof(struct AsanExtra) - 1))) { + *c = n; + return (struct AsanExtra *)x; + } else { + return 0; + } +} + +// Returns true if `p` was allocated by an IGNORE_LEAKS(function). +int __asan_is_leaky(void *p) { + int sym; + size_t c, i, n; + intptr_t f, *l; + struct AsanExtra *e; + struct SymbolTable *st; + if (!_weaken(GetSymbolTable)) + notpossible; + if (!(e = __asan_get_extra(p, &c))) + return 0; + if (!__asan_read48(e->size, &n)) + return 0; + if (!__asan_is_mapped((((intptr_t)p >> 3) + 0x7fff8000) >> 16)) + return 0; + if (!(st = GetSymbolTable())) + return 0; + for (i = 0; i < ARRAYLEN(e->bt.p) && e->bt.p[i]; ++i) { + if ((sym = _weaken(__get_symbol)(st, e->bt.p[i])) == -1) + continue; + f = st->addr_base + st->symbols[sym].x; + for (l = _leaky_start; l < _leaky_end; ++l) + if (f == *l) + return 1; + } + return 0; +} + +#define __asan_trace __asan_rawtrace + +int __asan_print_trace(void *p) { + size_t c, i, n; + struct AsanExtra *e; + if (!(e = __asan_get_extra(p, &c))) { + kprintf(" bad pointer"); + return einval(); + } + if (!__asan_read48(e->size, &n)) { + kprintf(" bad cookie"); + return -1; + } + kprintf("\n%p %,lu bytes [asan]", (char *)p, n); + if (!__asan_is_mapped((((intptr_t)p >> 3) + 0x7fff8000) >> 16)) + kprintf(" (shadow not mapped?!)"); + for (i = 0; i < ARRAYLEN(e->bt.p) && e->bt.p[i]; ++i) + kprintf("\n%*lx %t", 12, e->bt.p[i], e->bt.p[i]); + return 0; +} + +static void __asan_onmemory(void *x, void *y, size_t n, void *a) { + const unsigned char *p = x; + struct ReportOriginHeap *t = a; + if ((p <= t->a && t->a < p + n) || + (p <= t->a + t->z && t->a + t->z < p + n) || + (t->a < p && p + n <= t->a + t->z)) { + kprintf("%p %,lu bytes [dlmalloc]", x, n); + __asan_print_trace(x); + kprintf("\n"); + } +} + +void __asan_report_memory_origin_heap(const unsigned char *a, int z) { + struct ReportOriginHeap t; + kprintf("\nthe memory was allocated by\n"); + t.a = a; + t.z = z; + dlmalloc_inspect_all(__asan_onmemory, &t); +} + +size_t __asan_get_heap_size(const void *p) { + size_t n, c; + struct AsanExtra *e; + if ((e = __asan_get_extra(p, &c)) && __asan_read48(e->size, &n)) + return n; + return 0; +} + +static __wur __asan_die_f *__asan_report_invalid_pointer(const void *addr) { + pthread_spin_lock(&__asan_lock); + kprintf("\n\e[J\e[1;31masan error\e[0m: this corruption at %p shadow %p\n", + addr, SHADOW(addr)); + return __asan_die(); +} + +size_t malloc_usable_size(void *p) { + size_t n, c; + struct AsanExtra *e; + if ((e = __asan_get_extra(p, &c)) && __asan_read48(e->size, &n)) + return n; + __asan_report_invalid_pointer(p)(); + __asan_unreachable(); +} + +static void __asan_deallocate(char *p, long kind) { + size_t c, n; + struct AsanExtra *e; + if ((e = __asan_get_extra(p, &c))) { + if (__asan_read48(e->size, &n)) { + __asan_poison(p, c, kind); + if (c <= ASAN_MORGUE_THRESHOLD) + p = __asan_morgue_add(p); + dlfree(p); + } else { + __asan_report_invalid_pointer(p)(); + __asan_unreachable(); + } + } else { + __asan_report_invalid_pointer(p)(); + __asan_unreachable(); + } +} + +static void *__asan_realloc_nogrow(void *p, size_t n, size_t m, + struct AsanTrace *bt) { + return 0; +} + +static void *__asan_realloc_grow(void *p, size_t n, size_t m, + struct AsanTrace *bt) { + char *q; + if ((q = __asan_allocate_heap(16, n, bt))) { + __asan_memcpy(q, p, m); + __asan_deallocate(p, kAsanHeapRelocated); + } + return q; +} + +static void *__asan_realloc_impl(void *p, size_t n, + void *grow(void *, size_t, size_t, + struct AsanTrace *)) { + size_t c, m; + struct AsanExtra *e; + if ((e = __asan_get_extra(p, &c))) { + if (__asan_read48(e->size, &m)) { + if (n <= m) { // shrink + __asan_poison((char *)p + n, m - n, kAsanHeapOverrun); + __asan_write48(&e->size, n); + return p; + } else if (n <= c - sizeof(struct AsanExtra)) { // small growth + __asan_unpoison((char *)p + m, n - m); + __asan_write48(&e->size, n); + return p; + } else { // exponential growth + return grow(p, n, m, &e->bt); + } + } + } + __asan_report_invalid_pointer(p)(); + __asan_unreachable(); +} + +void free(void *p) { + if (!p) + return; + __asan_deallocate(p, kAsanHeapFree); +} + +void *malloc(size_t size) { + struct AsanTrace bt; + __asan_trace(&bt, RBP); + return __asan_allocate_heap(16, size, &bt); +} + +void *memalign(size_t align, size_t size) { + struct AsanTrace bt; + __asan_trace(&bt, RBP); + return __asan_allocate_heap(align, size, &bt); +} + +void *calloc(size_t n, size_t m) { + struct AsanTrace bt; + __asan_trace(&bt, RBP); + if (ckd_mul(&n, n, m)) + n = -1; + return __asan_allocate(16, n, &bt, kAsanHeapUnderrun, kAsanHeapOverrun, 0x00); +} + +void *realloc(void *p, size_t n) { + struct AsanTrace bt; + if (p) { + return __asan_realloc_impl(p, n, __asan_realloc_grow); + } else { + __asan_trace(&bt, RBP); + void *res = __asan_allocate_heap(16, n, &bt); + return res; + } +} + +void *realloc_in_place(void *p, size_t n) { + return p ? __asan_realloc_impl(p, n, __asan_realloc_nogrow) : 0; +} + +int malloc_trim(size_t pad) { + __asan_morgue_flush(); + return dlmalloc_trim(pad); +} + +void *__asan_stack_malloc(size_t size, int classid) { + struct AsanTrace bt; + __asan_trace(&bt, RBP); + return __asan_allocate(16, size, &bt, kAsanStackUnderrun, kAsanStackOverrun, + 0xf9); +} + +void __asan_stack_free(char *p, size_t size, int classid) { + __asan_deallocate(p, kAsanStackFree); +} + +size_t bulk_free(void *p[], size_t n) { + size_t i; + for (i = 0; i < n; ++i) + if (p[i]) { + __asan_deallocate(p[i], kAsanHeapFree); + p[i] = 0; + } + return 0; +} + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/intrin/asanthunk.c b/libc/mem/asanthunk.c similarity index 98% rename from libc/intrin/asanthunk.c rename to libc/mem/asanthunk.c index 3bc8882ca..5abecff00 100644 --- a/libc/intrin/asanthunk.c +++ b/libc/mem/asanthunk.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #ifdef __x86_64__ +#ifdef __SANITIZE_ADDRESS__ void __asan_report_load(void *, int); void __asan_report_store(void *, int); @@ -173,4 +174,5 @@ void __asan_store32() { __builtin_trap(); } +#endif /* __SANITIZE_ADDRESS__ */ #endif /* __x86_64__ */ diff --git a/libc/mem/bulk_free.c b/libc/mem/bulk_free.c index 8468653bb..64c736a50 100644 --- a/libc/mem/bulk_free.c +++ b/libc/mem/bulk_free.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -size_t (*hook_bulk_free)(void *[], size_t) = dlbulk_free; +#ifndef __SANITIZE_ADDRESS__ /** * Frees and clears (sets to NULL) each non-null pointer in given array. @@ -32,5 +30,7 @@ size_t (*hook_bulk_free)(void *[], size_t) = dlbulk_free; * to sort this array before calling bulk_free. */ size_t bulk_free(void **p, size_t n) { - return hook_bulk_free(p, n); + return dlbulk_free(p, n); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/calloc.c b/libc/mem/calloc.c index 7d1ffa9d2..f64294fb5 100644 --- a/libc/mem/calloc.c +++ b/libc/mem/calloc.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -void *(*hook_calloc)(size_t, size_t) = dlcalloc; +#ifndef __SANITIZE_ADDRESS__ /** * Allocates n * itemsize bytes, initialized to zero. @@ -32,5 +30,7 @@ void *(*hook_calloc)(size_t, size_t) = dlcalloc; * @see dlcalloc() */ void *calloc(size_t n, size_t itemsize) { - return hook_calloc(n, itemsize); + return dlcalloc(n, itemsize); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/free.c b/libc/mem/free.c index 4fff097cb..c7ba3c233 100644 --- a/libc/mem/free.c +++ b/libc/mem/free.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -void (*hook_free)(void *) = dlfree; +#ifndef __SANITIZE_ADDRESS__ /** * Free memory returned by malloc() & co. @@ -33,5 +31,7 @@ void (*hook_free)(void *) = dlfree; * @see dlfree() */ void free(void *p) { - hook_free(p); + dlfree(p); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/hook.internal.h b/libc/mem/hook.internal.h deleted file mode 100644 index 037d61ecf..000000000 --- a/libc/mem/hook.internal.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_MEM_HOOK_H_ -#define COSMOPOLITAN_LIBC_MEM_HOOK_H_ -COSMOPOLITAN_C_START_ - -extern void (*hook_free)(void *); -extern void *(*hook_malloc)(size_t); -extern void *(*hook_calloc)(size_t, size_t); -extern void *(*hook_memalign)(size_t, size_t); -extern void *(*hook_realloc)(void *, size_t); -extern void *(*hook_realloc_in_place)(void *, size_t); -extern int (*hook_malloc_trim)(size_t); -extern size_t (*hook_malloc_usable_size)(void *); -extern size_t (*hook_bulk_free)(void *[], size_t); - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_MEM_HOOK_H_ */ diff --git a/libc/mem/malloc.c b/libc/mem/malloc.c index bf47380c6..68629fb35 100644 --- a/libc/mem/malloc.c +++ b/libc/mem/malloc.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -void *(*hook_malloc)(size_t) = dlmalloc; +#ifndef __SANITIZE_ADDRESS__ /** * Allocates uninitialized memory. @@ -43,5 +41,7 @@ void *(*hook_malloc)(size_t) = dlmalloc; * @return new memory, or NULL w/ errno */ void *malloc(size_t n) { - return hook_malloc(n); + return dlmalloc(n); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/malloc_inspect_all.c b/libc/mem/malloc_inspect_all.c index 16b03e266..c6e9468ef 100644 --- a/libc/mem/malloc_inspect_all.c +++ b/libc/mem/malloc_inspect_all.c @@ -19,8 +19,8 @@ #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" -void malloc_inspect_all(void (*handler)(void* start, void* end, - size_t used_bytes, void* callback_arg), +void malloc_inspect_all(void handler(void* start, void* end, size_t used_bytes, + void* callback_arg), void* arg) { dlmalloc_inspect_all(handler, arg); } diff --git a/libc/mem/malloc_trim.c b/libc/mem/malloc_trim.c index 0165a28a8..3c6f3b474 100644 --- a/libc/mem/malloc_trim.c +++ b/libc/mem/malloc_trim.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -int (*hook_malloc_trim)(size_t) = dlmalloc_trim; +#ifndef __SANITIZE_ADDRESS__ /** * Releases freed memory back to system. @@ -29,5 +27,7 @@ int (*hook_malloc_trim)(size_t) = dlmalloc_trim; * @return 1 if it actually released any memory, else 0 */ int malloc_trim(size_t n) { - return hook_malloc_trim(n); + return dlmalloc_trim(n); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/malloc_usable_size.c b/libc/mem/malloc_usable_size.c index 2eb1ce555..60b1c738d 100644 --- a/libc/mem/malloc_usable_size.c +++ b/libc/mem/malloc_usable_size.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -size_t (*hook_malloc_usable_size)(void *) = dlmalloc_usable_size; +#ifndef __SANITIZE_ADDRESS__ /** * Returns the number of bytes you can actually use in @@ -41,5 +39,7 @@ size_t (*hook_malloc_usable_size)(void *) = dlmalloc_usable_size; * @see dlmalloc_usable_size() */ size_t malloc_usable_size(void *p) { - return hook_malloc_usable_size(p); + return dlmalloc_usable_size(p); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/memalign.c b/libc/mem/memalign.c index adc367e81..114048477 100644 --- a/libc/mem/memalign.c +++ b/libc/mem/memalign.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -void *(*hook_memalign)(size_t, size_t) = dlmemalign; +#ifndef __SANITIZE_ADDRESS__ /** * Allocates aligned memory. @@ -36,5 +34,7 @@ void *(*hook_memalign)(size_t, size_t) = dlmemalign; * @see valloc(), pvalloc() */ void *memalign(size_t align, size_t bytes) { - return hook_memalign(align, bytes); + return dlmemalign(align, bytes); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/realloc.c b/libc/mem/realloc.c index 63481fa80..589ef2bfe 100644 --- a/libc/mem/realloc.c +++ b/libc/mem/realloc.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -void *(*hook_realloc)(void *, size_t) = dlrealloc; +#ifndef __SANITIZE_ADDRESS__ /** * Allocates / resizes / frees memory, e.g. @@ -61,5 +59,7 @@ void *(*hook_realloc)(void *, size_t) = dlrealloc; * @see dlrealloc() */ void *realloc(void *p, size_t n) { - return hook_realloc(p, n); + return dlrealloc(p, n); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/hook_realloc_in_place.c b/libc/mem/realloc_in_place.c similarity index 94% rename from libc/mem/hook_realloc_in_place.c rename to libc/mem/realloc_in_place.c index 5f47fee8b..514cfd046 100644 --- a/libc/mem/hook_realloc_in_place.c +++ b/libc/mem/realloc_in_place.c @@ -16,11 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/hook.internal.h" #include "libc/mem/mem.h" #include "third_party/dlmalloc/dlmalloc.h" - -void *(*hook_realloc_in_place)(void *, size_t) = dlrealloc_in_place; +#ifndef __SANITIZE_ADDRESS__ /** * Resizes the space allocated for p to size n, only if this can be @@ -38,5 +36,7 @@ void *(*hook_realloc_in_place)(void *, size_t) = dlrealloc_in_place; * @see dlrealloc_in_place() */ void *realloc_in_place(void *p, size_t n) { - return hook_realloc_in_place(p, n); + return dlrealloc_in_place(p, n); } + +#endif /* __SANITIZE_ADDRESS__ */ diff --git a/libc/mem/tinymalloc.inc b/libc/mem/tinymalloc.inc index 46ca7b888..3b04b0281 100644 --- a/libc/mem/tinymalloc.inc +++ b/libc/mem/tinymalloc.inc @@ -28,7 +28,7 @@ #define TINYMALLOC_MAX_ALIGN 4096 #endif -#ifndef MODE_DBG /* don't interfere with asan dlmalloc hooking */ +#ifndef MODE_DBG /* don't interfere with asan malloc */ alignas(TINYMALLOC_MAX_ALIGN) static struct { char memory[TINYMALLOC_MAX_BYTES]; diff --git a/libc/thread/BUILD.mk b/libc/thread/BUILD.mk index 7d452ce8c..5417244b8 100644 --- a/libc/thread/BUILD.mk +++ b/libc/thread/BUILD.mk @@ -34,6 +34,8 @@ LIBC_THREAD_A_DIRECTDEPS = \ LIBC_STR \ LIBC_SYSV \ LIBC_SYSV_CALLS \ + LIBC_TINYMATH \ + THIRD_PARTY_DLMALLOC \ THIRD_PARTY_NSYNC \ THIRD_PARTY_NSYNC_MEM diff --git a/libc/thread/pthread_create.c b/libc/thread/pthread_create.c index f0a44e40a..a4487325c 100644 --- a/libc/thread/pthread_create.c +++ b/libc/thread/pthread_create.c @@ -64,6 +64,10 @@ __static_yoink("_pthread_onfork_prepare"); __static_yoink("_pthread_onfork_parent"); __static_yoink("_pthread_onfork_child"); +/* #ifndef MODE_DBG */ +/* __static_yoink("threaded_dlmalloc"); */ +/* #endif */ + #define MAP_ANON_OPENBSD 0x1000 #define MAP_STACK_OPENBSD 0x4000 diff --git a/libc/thread/pthread_exit.c b/libc/thread/pthread_exit.c index ef40846d1..bf163980e 100644 --- a/libc/thread/pthread_exit.c +++ b/libc/thread/pthread_exit.c @@ -45,26 +45,23 @@ void _pthread_unwind(struct PosixThread *pt) { } void _pthread_unkey(struct CosmoTib *tib) { + void *val; int i, j, gotsome; - void *val, **keys; pthread_key_dtor dtor; - if ((keys = tib->tib_keys)) { - for (j = 0; j < PTHREAD_DESTRUCTOR_ITERATIONS; ++j) { - for (gotsome = i = 0; i < PTHREAD_KEYS_MAX; ++i) { - if ((val = keys[i]) && - (dtor = atomic_load_explicit(_pthread_key_dtor + i, - memory_order_relaxed)) && - dtor != (pthread_key_dtor)-1) { - gotsome = 1; - keys[i] = 0; - dtor(val); - } - } - if (!gotsome) { - break; + for (j = 0; j < PTHREAD_DESTRUCTOR_ITERATIONS; ++j) { + for (gotsome = i = 0; i < PTHREAD_KEYS_MAX; ++i) { + if ((val = tib->tib_keys[i]) && + (dtor = atomic_load_explicit(_pthread_key_dtor + i, + memory_order_relaxed)) && + dtor != (pthread_key_dtor)-1) { + gotsome = 1; + tib->tib_keys[i] = 0; + dtor(val); } } - free(keys); + if (!gotsome) { + break; + } } } @@ -131,23 +128,6 @@ wontreturn void pthread_exit(void *rc) { } } -#ifndef MODE_DBG - // free tls freelist - // - // 1. set lengths to -1 so free() thinks it's full - // 2. free globally by giving mallocs back to free - // - short freelen[32]; - static_assert(sizeof(freelen) == sizeof(tib->tib_freelen), ""); - memcpy(freelen, tib->tib_freelen, sizeof(freelen)); - memset(tib->tib_freelen, -1, sizeof(freelen)); - for (int i = 0; i < 32; ++i) { - if (freelen[i] > 0) { - free(tib->tib_freemem[i]); - } - } -#endif - // transition the thread to a terminated state status = atomic_load_explicit(&pt->pt_status, memory_order_acquire); do { diff --git a/libc/thread/pthread_getspecific.c b/libc/thread/pthread_getspecific.c index 3e432ff56..622a3888c 100644 --- a/libc/thread/pthread_getspecific.c +++ b/libc/thread/pthread_getspecific.c @@ -18,18 +18,10 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/intrin/atomic.h" -#include "libc/mem/mem.h" #include "libc/thread/posixthread.internal.h" #include "libc/thread/thread.h" #include "libc/thread/tls.h" -// this is a legacy api so we avoid making the tib 1024 bytes larger -static void pthread_key_init(void) { - if (!__get_tls()->tib_keys) { - __get_tls()->tib_keys = calloc(PTHREAD_KEYS_MAX, sizeof(void *)); - } -} - /** * Sets value of TLS slot for current thread. * @@ -43,7 +35,6 @@ int pthread_setspecific(pthread_key_t k, const void *val) { // pthread_key_create() or after key has been deleted with // pthread_key_delete() is undefined." // ──Quoth POSIX.1-2017 - pthread_key_init(); unassert(0 <= k && k < PTHREAD_KEYS_MAX); unassert(atomic_load_explicit(_pthread_key_dtor + k, memory_order_acquire)); __get_tls()->tib_keys[k] = (void *)val; @@ -63,7 +54,6 @@ void *pthread_getspecific(pthread_key_t k) { // pthread_key_create() or after key has been deleted with // pthread_key_delete() is undefined." // ──Quoth POSIX.1-2017 - pthread_key_init(); unassert(0 <= k && k < PTHREAD_KEYS_MAX); unassert(atomic_load_explicit(_pthread_key_dtor + k, memory_order_acquire)); return __get_tls()->tib_keys[k]; diff --git a/libc/thread/thread.h b/libc/thread/thread.h index b496ce664..7d4290b5a 100644 --- a/libc/thread/thread.h +++ b/libc/thread/thread.h @@ -1,7 +1,7 @@ #ifndef COSMOPOLITAN_LIBC_THREAD_THREAD_H_ #define COSMOPOLITAN_LIBC_THREAD_THREAD_H_ -#define PTHREAD_KEYS_MAX 128 +#define PTHREAD_KEYS_MAX 48 #define PTHREAD_STACK_MIN 65536 #define PTHREAD_DESTRUCTOR_ITERATIONS 4 @@ -44,8 +44,7 @@ COSMOPOLITAN_C_START_ #define PTHREAD_COND_INITIALIZER _PTHREAD_INIT #define PTHREAD_RWLOCK_INITIALIZER _PTHREAD_INIT #define PTHREAD_MUTEX_INITIALIZER _PTHREAD_INIT -#define _PTHREAD_INIT \ - { 0 } +#define _PTHREAD_INIT {0} typedef uintptr_t pthread_t; typedef int pthread_id_np_t; diff --git a/libc/thread/tls.h b/libc/thread/tls.h index 8b233a4c5..3957c921d 100644 --- a/libc/thread/tls.h +++ b/libc/thread/tls.h @@ -37,10 +37,8 @@ struct CosmoTib { char *tib_sigstack_addr; uint32_t tib_sigstack_size; uint32_t tib_sigstack_flags; - void **tib_keys; void *tib_nsync; - unsigned short tib_freelen[32]; - void *tib_freemem[32]; + void *tib_keys[48]; } __attribute__((__aligned__(64))); extern int __threaded; diff --git a/libc/tinymath/BUILD.mk b/libc/tinymath/BUILD.mk index 15493f253..cd6e5e8ed 100644 --- a/libc/tinymath/BUILD.mk +++ b/libc/tinymath/BUILD.mk @@ -63,7 +63,8 @@ $(LIBC_TINYMATH_A_OBJS): private \ -fno-associative-math \ -fno-finite-math-only \ -fno-cx-limited-range \ - -ffp-int-builtin-inexact + -ffp-int-builtin-inexact \ + -fno-sanitize=address LIBC_TINYMATH_LIBS = $(foreach x,$(LIBC_TINYMATH_ARTIFACTS),$($(x))) LIBC_TINYMATH_HDRS = $(foreach x,$(LIBC_TINYMATH_ARTIFACTS),$($(x)_HDRS)) diff --git a/libc/vga/BUILD.mk b/libc/vga/BUILD.mk index 55c801065..93f83b431 100644 --- a/libc/vga/BUILD.mk +++ b/libc/vga/BUILD.mk @@ -45,6 +45,8 @@ $(LIBC_VGA_A).pkg: \ $(LIBC_VGA_A_OBJS) \ $(foreach x,$(LIBC_VGA_A_DIRECTDEPS),$($(x)_A).pkg) +$(LIBC_VGA_A_OBJS): private COPTS += -fno-sanitize=address + LIBC_VGA_LIBS = $(foreach x,$(LIBC_VGA_ARTIFACTS),$($(x))) LIBC_VGA_SRCS = $(foreach x,$(LIBC_VGA_ARTIFACTS),$($(x)_SRCS)) LIBC_VGA_HDRS = $(foreach x,$(LIBC_VGA_ARTIFACTS),$($(x)_HDRS)) diff --git a/net/finger/BUILD.mk b/net/finger/BUILD.mk index fce253094..09b8c141f 100644 --- a/net/finger/BUILD.mk +++ b/net/finger/BUILD.mk @@ -22,6 +22,7 @@ NET_FINGER_A_CHECKS = \ NET_FINGER_A_DIRECTDEPS = \ LIBC_INTRIN \ + LIBC_MEM \ LIBC_NEXGEN32E NET_FINGER_A_DEPS := \ diff --git a/test/ctl/BUILD.mk b/test/ctl/BUILD.mk index 3c45b36e2..9b2b59c13 100644 --- a/test/ctl/BUILD.mk +++ b/test/ctl/BUILD.mk @@ -15,6 +15,7 @@ TEST_CTL_DIRECTDEPS = \ CTL \ LIBC_INTRIN \ LIBC_LOG \ + LIBC_MEM \ THIRD_PARTY_LIBCXX \ TEST_CTL_DEPS := \ diff --git a/test/libc/tinymath/magicu_test.c b/test/libc/intrin/magicu_test.c similarity index 91% rename from test/libc/tinymath/magicu_test.c rename to test/libc/intrin/magicu_test.c index cb3da8d6a..2458f378e 100644 --- a/test/libc/tinymath/magicu_test.c +++ b/test/libc/intrin/magicu_test.c @@ -16,13 +16,13 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/magicu.h" +#include "libc/intrin/magicu.h" #include "libc/limits.h" #include "libc/macros.internal.h" #include "libc/runtime/runtime.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" -#include "libc/tinymath/magicu.h" +#include "libc/intrin/magicu.h" #define T uint32_t #define TBIT (sizeof(T) * CHAR_BIT - 1) @@ -47,6 +47,13 @@ TEST(magicu, test) { } } +TEST(magicu, max) { + ASSERT_EQ(0, __magicu_div(0, __magicu_get(-1))); + ASSERT_EQ(0, __magicu_div(1, __magicu_get(-1))); + ASSERT_EQ(0, __magicu_div(100, __magicu_get(-1))); + ASSERT_EQ(0, __magicu_div(-2, __magicu_get(-1))); +} + BENCH(magicu, bench) { struct magicu d = __magicu_get(UINT32_MAX); EZBENCH2("__magicu_get", donothing, __magicu_get(__veil("r", UINT32_MAX))); diff --git a/test/libc/mem/malloc_test.c b/test/libc/mem/malloc_test.c index ab5d1e7dc..96f52b289 100644 --- a/test/libc/mem/malloc_test.c +++ b/test/libc/mem/malloc_test.c @@ -23,6 +23,7 @@ #include "libc/errno.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/cxaatexit.internal.h" +#include "libc/intrin/kprintf.h" #include "libc/intrin/safemacros.internal.h" #include "libc/macros.internal.h" #include "libc/mem/gc.h" @@ -211,17 +212,17 @@ BENCH(bulk_free, bench) { #define ITERATIONS 10000 void *Worker(void *arg) { - for (int i = 0; i < ITERATIONS; ++i) { - char *p; - ASSERT_NE(NULL, (p = malloc(lemur64() % 128))); - ASSERT_NE(NULL, (p = realloc(p, max(lemur64() % 128, 1)))); - free(p); - } + /* for (int i = 0; i < ITERATIONS; ++i) { */ + /* char *p; */ + /* ASSERT_NE(NULL, (p = malloc(lemur64() % 128))); */ + /* ASSERT_NE(NULL, (p = realloc(p, max(lemur64() % 128, 1)))); */ + /* free(p); */ + /* } */ return 0; } BENCH(malloc, torture) { - int i, n = __get_cpu_count() * 2; + int i, n = __get_cpu_count(); pthread_t *t = gc(malloc(sizeof(pthread_t) * n)); if (!n) return; diff --git a/test/libc/mem/thread_test.cc b/test/libc/mem/thread_test.cc deleted file mode 100644 index 9cc845985..000000000 --- a/test/libc/mem/thread_test.cc +++ /dev/null @@ -1,79 +0,0 @@ -/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│ -│ vi: set et ft=c++ ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2024 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/thread/thread.h" -#include "libc/assert.h" -#include "libc/calls/calls.h" -#include "libc/calls/struct/timespec.h" -#include "libc/fmt/itoa.h" -#include "libc/macros.internal.h" -#include "libc/runtime/runtime.h" -#include "libc/stdio/rand.h" -#include "libc/stdio/stdio.h" -#include "libc/str/str.h" - -// -// BEFORE ADDING TLS FREELIST -// -// malloc multithreading torture test -// with 192 threads and 10000 iterations -// consumed 0.084721 wall and 0.141747 cpu seconds -// -// AFTER ADDING TLS FREELIST -// -// malloc multithreading torture test -// with 192 threads and 10000 iterations -// consumed 0.035193 wall and 4.34012 cpu seconds -// - -#define ITERATIONS 10000 - -void *Worker(void *arg) { - char *thing[32] = {}; - for (int i = 0; i < ITERATIONS; ++i) { - int r = rand(); - int j = r % ARRAYLEN(thing); - if (thing[j]) { - delete[] thing[j]; - thing[j] = 0; - } else { - thing[j] = new char[12 + ((r >> 8) % 32)]; - } - } - return 0; -} - -int main(int argc, char *argv[]) { - int n = __get_cpu_count(); - pthread_t *t = new pthread_t[n]; - fprintf(stderr, - "\n" - "malloc multithreading torture test\n" - "with %d threads and %d iterations\n", - n, ITERATIONS); - struct timespec t1 = timespec_real(); - for (int i = 0; i < n; ++i) - unassert(!pthread_create(t + i, 0, Worker, 0)); - for (int i = 0; i < n; ++i) - unassert(!pthread_join(t[i], 0)); - struct timespec t2 = timespec_real(); - fprintf(stderr, "consumed %g wall and %g cpu seconds\n", - timespec_tomicros(timespec_sub(t2, t1)) * 1e-6, - (double)clock() / CLOCKS_PER_SEC); - delete[] t; -} diff --git a/test/math/BUILD.mk b/test/math/BUILD.mk index 32d9e93dc..956672e9c 100644 --- a/test/math/BUILD.mk +++ b/test/math/BUILD.mk @@ -13,6 +13,7 @@ TEST_MATH_CHECKS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.runs) TEST_MATH_DIRECTDEPS = \ LIBC_INTRIN \ + LIBC_MEM \ LIBC_RUNTIME \ LIBC_SYSV \ LIBC_TINYMATH \ diff --git a/third_party/compiler_rt/BUILD.mk b/third_party/compiler_rt/BUILD.mk index dc933ee62..1bf28577c 100644 --- a/third_party/compiler_rt/BUILD.mk +++ b/third_party/compiler_rt/BUILD.mk @@ -46,7 +46,8 @@ $(THIRD_PARTY_COMPILER_RT_A).pkg: \ $(THIRD_PARTY_COMPILER_RT_A_OBJS): private \ DEFAULT_CFLAGS += \ $(OLD_CODE) \ - -DCRT_HAS_128BIT + -DCRT_HAS_128BIT \ + -fno-sanitize=address # these assembly files are safe to build on aarch64 o/$(MODE)/third_party/compiler_rt/comprt.o: third_party/compiler_rt/comprt.S diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index fb7eac2a1..81c00116a 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -26,25 +26,29 @@ #include "libc/thread/tls.h" #include "third_party/nsync/mu.h" +#if !IsTiny() +#define FOOTERS 1 +#define MSPACES 1 +#define ONLY_MSPACES 1 // enables scalable multi-threaded malloc +#else +#define INSECURE 1 +#define PROCEED_ON_ERROR 1 #define FOOTERS 0 #define MSPACES 0 +#define ONLY_MSPACES 0 +#endif #define HAVE_MMAP 1 #define HAVE_MREMAP 0 #define HAVE_MORECORE 0 #define USE_LOCKS 2 -#define USE_SPIN_LOCKS 0 +#define USE_SPIN_LOCKS 1 #define MORECORE_CONTIGUOUS 0 #define MALLOC_INSPECT_ALL 1 #define ABORT_ON_ASSERT_FAILURE 0 #define LOCK_AT_FORK 1 #define NO_MALLOC_STATS 1 -#if IsTiny() -#define INSECURE 1 -#define PROCEED_ON_ERROR 1 -#endif - #if IsModeDbg() #define DEBUG 1 #endif @@ -56,24 +60,29 @@ #define assert(x) if(!(x)) ABORT #endif -#include "third_party/dlmalloc/platform.inc" -#include "third_party/dlmalloc/locks.inc" -#include "third_party/dlmalloc/chunks.inc" -#include "third_party/dlmalloc/headfoot.inc" -#include "third_party/dlmalloc/global.inc" -#include "third_party/dlmalloc/system.inc" -#include "third_party/dlmalloc/hooks.inc" -#include "third_party/dlmalloc/debugging.inc" -#include "third_party/dlmalloc/indexing.inc" -#include "third_party/dlmalloc/binmaps.inc" -#include "third_party/dlmalloc/runtimechecks.inc" -#include "third_party/dlmalloc/init.inc" -#include "third_party/dlmalloc/debuglib.inc" -#include "third_party/dlmalloc/statistics.inc" -#include "third_party/dlmalloc/smallbins.inc" -#include "third_party/dlmalloc/directmap.inc" -#include "third_party/dlmalloc/trees.inc" -#include "third_party/dlmalloc/management.inc" +#include "platform.inc" +#include "locks.inc" +#include "chunks.inc" +#include "headfoot.inc" + +#if ONLY_MSPACES +#include "threaded.inc" +#endif + +#include "global.inc" +#include "system.inc" +#include "hooks.inc" +#include "debugging.inc" +#include "indexing.inc" +#include "binmaps.inc" +#include "runtimechecks.inc" +#include "init.inc" +#include "debuglib.inc" +#include "statistics.inc" +#include "smallbins.inc" +#include "directmap.inc" +#include "trees.inc" +#include "management.inc" /* -------------------------- System allocation -------------------------- */ @@ -585,29 +594,7 @@ static void* tmalloc_small(mstate m, size_t nb) { #if !ONLY_MSPACES -#define FREEBIE_COUNT 32 -#define FREEBIE_MAXSIZE 2048 - -void* dlmalloc(size_t bytes) { - -#if FREEBIE_COUNT && !defined(MODE_DBG) - /* Allocate from thread-local freelist. */ - if (__threaded && bytes && bytes <= FREEBIE_MAXSIZE) { - unsigned need = bytes; - unsigned best_index = FREEBIE_COUNT; - unsigned best_delta = FREEBIE_MAXSIZE + 1; - struct CosmoTib *tib = __get_tls(); - for (int i = 0; i < FREEBIE_COUNT; ++i) { - unsigned d = tib->tib_freelen[i] - need; - best_index = d < best_delta ? i : best_index; - best_delta = d < best_delta ? d : best_delta; - } - if (best_index < FREEBIE_COUNT) { - tib->tib_freelen[best_index] = 0; - return tib->tib_freemem[best_index]; - } - } -#endif +void* dlmalloc_single(size_t bytes) { /* Basic algorithm: @@ -769,26 +756,6 @@ void dlfree(void* mem) { #define fm gm #endif /* FOOTERS */ -#if FREEBIE_COUNT && !defined(MODE_DBG) - /* Free small allocations locally. */ - if (__threaded) { - struct CosmoTib *tib = __get_tls(); - for (int i = 0; i < FREEBIE_COUNT; ++i) { - if (!tib->tib_freelen[i]) { - if (is_inuse(p)) { - size_t len = chunksize(p) - overhead_for(p); - if (len && len < FREEBIE_MAXSIZE) { - tib->tib_freelen[i] = len; - tib->tib_freemem[i] = mem; - return; - } - } - break; - } - } - } -#endif - /* Otherwise free memory globally. */ if (!PREACTION(fm)) { check_inuse_chunk(fm, p); @@ -881,7 +848,7 @@ void dlfree(void* mem) { #endif /* FOOTERS */ } -void* dlcalloc(size_t n_elements, size_t elem_size) { +void* dlcalloc_single(size_t n_elements, size_t elem_size) { void* mem; size_t req = 0; if (ckd_mul(&req, n_elements, elem_size)) req = -1; @@ -977,10 +944,10 @@ static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb, static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { void* mem = 0; - if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ + if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ alignment = MIN_CHUNK_SIZE; /* alignment is 32+ bytes rounded up to nearest two power */ - alignment = 2ul << bsrl(MAX(MIN_CHUNK_SIZE, alignment) - 1); + alignment = 2ul << bsrl(alignment - 1); if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ MALLOC_FAILURE_ACTION; @@ -1267,7 +1234,7 @@ static void internal_inspect_all(mstate m, #if !ONLY_MSPACES -void* dlrealloc(void* oldmem, size_t bytes) { +void* dlrealloc_single(void* oldmem, size_t bytes) { void* mem = 0; if (oldmem == 0) { mem = dlmalloc(bytes); @@ -1343,7 +1310,7 @@ void* dlrealloc_in_place(void* oldmem, size_t bytes) { return mem; } -void* dlmemalign(size_t alignment, size_t bytes) { +void* dlmemalign_single(size_t alignment, size_t bytes) { if (alignment <= MALLOC_ALIGNMENT) { return dlmalloc(bytes); } @@ -1421,7 +1388,7 @@ size_t dlmalloc_set_footprint_limit(size_t bytes) { } #if !NO_MALLINFO -struct mallinfo dlmallinfo(void) { +struct mallinfo dlmallinfo_single(void) { return internal_mallinfo(gm); } #endif /* NO_MALLINFO */ @@ -1454,6 +1421,20 @@ size_t dlmalloc_usable_size(void* mem) { #endif /* !ONLY_MSPACES */ +#if ONLY_MSPACES +void *(*dlmalloc)(size_t); +void *(*dlcalloc)(size_t, size_t); +void *(*dlrealloc)(void *, size_t); +void *(*dlmemalign)(size_t, size_t); +struct mallinfo (*dlmallinfo)(void); +#else +void *(*dlmalloc)(size_t) = dlmalloc_single; +void *(*dlcalloc)(size_t, size_t) = dlcalloc_single; +void *(*dlrealloc)(void *, size_t) = dlrealloc_single; +void *(*dlmemalign)(size_t, size_t) = dlmemalign_single; +struct mallinfo (*dlmallinfo)(void) = dlmallinfo_single; +#endif + /* ----------------------------- user mspaces ---------------------------- */ #if MSPACES diff --git a/third_party/dlmalloc/dlmalloc.h b/third_party/dlmalloc/dlmalloc.h index 5ef2d1be6..edb86f27a 100644 --- a/third_party/dlmalloc/dlmalloc.h +++ b/third_party/dlmalloc/dlmalloc.h @@ -25,6 +25,28 @@ #define dlrealloc_in_place __dlrealloc_in_place #define dlrealloc_in_place __dlrealloc_in_place +#define create_mspace_with_base __create_mspace_with_base +#define mspace_bulk_free __mspace_bulk_free +#define mspace_calloc __mspace_calloc +#define mspace_footprint __mspace_footprint +#define mspace_footprint_limit __mspace_footprint_limit +#define mspace_free __mspace_free +#define mspace_independent_calloc __mspace_independent_calloc +#define mspace_independent_comalloc __mspace_independent_comalloc +#define mspace_inspect_all __mspace_inspect_all +#define mspace_mallinfo __mspace_mallinfo +#define mspace_malloc __mspace_malloc +#define mspace_malloc_stats __mspace_malloc_stats +#define mspace_mallopt __mspace_mallopt +#define mspace_max_footprint __mspace_max_footprint +#define mspace_memalign __mspace_memalign +#define mspace_realloc __mspace_realloc +#define mspace_realloc_in_place __mspace_realloc_in_place +#define mspace_set_footprint_limit __mspace_set_footprint_limit +#define mspace_track_large_chunks __mspace_track_large_chunks +#define mspace_trim __mspace_trim +#define mspace_usable_size __mspace_usable_size + COSMOPOLITAN_C_START_ /* @@ -41,7 +63,7 @@ COSMOPOLITAN_C_START_ maximum supported value of n differs across systems, but is in all cases less than the maximum representable value of a size_t. */ -void* dlmalloc(size_t); +extern void* (*dlmalloc)(size_t); /* free(void* p) @@ -57,7 +79,7 @@ void dlfree(void*); Returns a pointer to n_elements * element_size bytes, with all locations set to zero. */ -void* dlcalloc(size_t, size_t); +extern void* (*dlcalloc)(size_t, size_t); /* realloc(void* p, size_t n) @@ -81,7 +103,7 @@ void* dlcalloc(size_t, size_t); The old unix realloc convention of allowing the last-free'd chunk to be used as an argument to realloc is not supported. */ -void* dlrealloc(void*, size_t); +extern void* (*dlrealloc)(void*, size_t); /* realloc_in_place(void* p, size_t n) @@ -110,7 +132,7 @@ void* dlrealloc_in_place(void*, size_t); Overreliance on memalign is a sure way to fragment space. */ -void* dlmemalign(size_t, size_t); +extern void* (*dlmemalign)(size_t, size_t); /* mallopt(int parameter_number, int parameter_value) @@ -233,7 +255,7 @@ void dlmalloc_inspect_all(void (*handler)(void*, void*, size_t, void*), thus be inaccurate. */ -struct mallinfo dlmallinfo(void); +extern struct mallinfo (*dlmallinfo)(void); /* independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); diff --git a/third_party/dlmalloc/init.inc b/third_party/dlmalloc/init.inc index 684e68995..264700f1c 100644 --- a/third_party/dlmalloc/init.inc +++ b/third_party/dlmalloc/init.inc @@ -2,13 +2,38 @@ /* ---------------------------- setting mparams -------------------------- */ #if LOCK_AT_FORK +#if ONLY_MSPACES + +static void dlmalloc_pre_fork(void) { + mstate h; + for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) + if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire))) + ACQUIRE_LOCK(&h->mutex); +} + +static void dlmalloc_post_fork_parent(void) { + mstate h; + for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) + if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire))) + RELEASE_LOCK(&h->mutex); +} + +static void dlmalloc_post_fork_child(void) { + mstate h; + for (unsigned i = 0; i < ARRAYLEN(g_heaps); ++i) + if ((h = atomic_load_explicit(&g_heaps[i], memory_order_acquire))) + (void)INITIAL_LOCK(&h->mutex); +} + +#else static void dlmalloc_pre_fork(void) { ACQUIRE_LOCK(&(gm)->mutex); } static void dlmalloc_post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); } static void dlmalloc_post_fork_child(void) { (void)INITIAL_LOCK(&(gm)->mutex); } +#endif /* ONLY_MSPACES */ #endif /* LOCK_AT_FORK */ /* Initialize mparams */ -__attribute__((__constructor__(50))) int init_mparams(void) { +__attribute__((__constructor__(49))) int init_mparams(void) { #ifdef NEED_GLOBAL_LOCK_INIT if (malloc_global_mutex_status <= 0) init_malloc_global_mutex(); @@ -95,6 +120,10 @@ __attribute__((__constructor__(50))) int init_mparams(void) { // RELEASE_MALLOC_GLOBAL_LOCK(); +#if ONLY_MSPACES + threaded_dlmalloc(); +#endif + __runlevel = RUNLEVEL_MALLOC; return 1; } diff --git a/third_party/dlmalloc/locks.inc b/third_party/dlmalloc/locks.inc index b74887916..037442ac5 100644 --- a/third_party/dlmalloc/locks.inc +++ b/third_party/dlmalloc/locks.inc @@ -78,7 +78,7 @@ static int malloc_unlock(MLOCK_T *lk) { #define ACQUIRE_LOCK(lk) malloc_lock(lk) #define RELEASE_LOCK(lk) malloc_unlock(lk) #define INITIAL_LOCK(lk) malloc_wipe(lk) -#define DESTROY_LOCK(lk) +#define DESTROY_LOCK(lk) malloc_wipe(lk) #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); diff --git a/third_party/dlmalloc/mspaces.inc b/third_party/dlmalloc/mspaces.inc index a9355d29a..1f048d0eb 100644 --- a/third_party/dlmalloc/mspaces.inc +++ b/third_party/dlmalloc/mspaces.inc @@ -5,7 +5,7 @@ static mstate init_user_mstate(char* tbase, size_t tsize) { mchunkptr mn; mchunkptr msp = align_as_chunk(tbase); mstate m = (mstate)(chunk2mem(msp)); - bzero(m, msize); + // bzero(m, msize); // [jart] it is not needed (void)INITIAL_LOCK(&m->mutex); msp->head = (msize|INUSE_BITS); m->seg.base = m->least_addr = tbase; @@ -32,7 +32,7 @@ mspace create_mspace(size_t capacity, int locked) { size_t rs = ((capacity == 0)? mparams.granularity : (capacity + TOP_FOOT_SIZE + msize)); size_t tsize = granularity_align(rs); - char* tbase = (char*)(dlmalloc_requires_more_vespene_gas(tsize)); + char* tbase = (char*)dlmalloc_requires_more_vespene_gas(tsize); if (tbase != CMFAIL) { m = init_user_mstate(tbase, tsize); m->seg.sflags = USE_MMAP_BIT; diff --git a/third_party/dlmalloc/threaded.inc b/third_party/dlmalloc/threaded.inc new file mode 100644 index 000000000..d13e7ca0a --- /dev/null +++ b/third_party/dlmalloc/threaded.inc @@ -0,0 +1,200 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/magicu.h" +#include "libc/intrin/strace.internal.h" +#include "libc/intrin/weaken.h" +#include "libc/macros.internal.h" +#include "libc/nexgen32e/rdtscp.h" +#include "libc/nexgen32e/x86feature.h" +#include "libc/runtime/runtime.h" +#include "libc/thread/thread.h" +#include "third_party/dlmalloc/dlmalloc.h" + +#if !FOOTERS || !MSPACES +#error "threaded dlmalloc needs footers and mspaces" +#endif + +static struct magicu magiu; +static unsigned g_heapslen; +static mstate g_heaps[128]; + +void dlfree(void *p) { + return mspace_free(0, p); +} + +size_t dlmalloc_usable_size(void* mem) { + return mspace_usable_size(mem); +} + +void* dlrealloc_in_place(void *p, size_t n) { + return mspace_realloc_in_place(0, p, n); +} + +int dlmallopt(int param_number, int value) { + return mspace_mallopt(param_number, value); +} + +int dlmalloc_trim(size_t pad) { + int got_some = 0; + for (unsigned i = 0; i < g_heapslen; ++i) + if (g_heaps[i]) + got_some |= mspace_trim(g_heaps[i], pad); + return got_some; +} + +size_t dlbulk_free(void *array[], size_t nelem) { + for (size_t i = 0; i < nelem; ++i) + mspace_free(0, array[i]); + return 0; +} + +void dlmalloc_inspect_all(void handler(void *start, void *end, + size_t used_bytes, void *callback_arg), + void *arg) { + for (unsigned i = 0; i < g_heapslen; ++i) + if (g_heaps[i]) + mspace_inspect_all(g_heaps[i], handler, arg); +} + +forceinline mstate get_arena(void) { + unsigned cpu; +#ifdef __x86_64__ + unsigned tsc_aux; + rdtscp(&tsc_aux); + cpu = TSC_AUX_CORE(tsc_aux); +#else + long tpidr_el0; + asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); + cpu = tpidr_el0 & 255; +#endif + return g_heaps[__magicu_div(cpu, magiu) % g_heapslen]; +} + +static void *dlmalloc_single(size_t n) { + return mspace_malloc(g_heaps[0], n); +} + +static void *dlmalloc_threaded(size_t n) { + return mspace_malloc(get_arena(), n); +} + +static void *dlcalloc_single(size_t n, size_t z) { + return mspace_calloc(g_heaps[0], n, z); +} + +static void *dlcalloc_threaded(size_t n, size_t z) { + return mspace_calloc(get_arena(), n, z); +} + +static void *dlrealloc_single(void *p, size_t n) { + return mspace_realloc(g_heaps[0], p, n); +} + +static void *dlrealloc_threaded(void *p, size_t n) { + if (p) + return mspace_realloc(0, p, n); + else + return mspace_malloc(get_arena(), n); +} + +static void *dlmemalign_single(size_t a, size_t n) { + return mspace_memalign(g_heaps[0], a, n); +} + +static void *dlmemalign_threaded(size_t a, size_t n) { + return mspace_memalign(get_arena(), a, n); +} + +static struct mallinfo dlmallinfo_single(void) { + return mspace_mallinfo(g_heaps[0]); +} + +static struct mallinfo dlmallinfo_threaded(void) { + return mspace_mallinfo(get_arena()); +} + +static int dlmalloc_atoi(const char *s) { + int c, x = 0; + while ((c = *s++)) { + x *= 10; + x += c - '0'; + } + return x; +} + +static void use_single_heap(bool uses_locks) { + g_heapslen = 1; + dlmalloc = dlmalloc_single; + dlcalloc = dlcalloc_single; + dlrealloc = dlrealloc_single; + dlmemalign = dlmemalign_single; + dlmallinfo = dlmallinfo_single; + if (!(g_heaps[0] = create_mspace(0, uses_locks))) + __builtin_trap(); +} + +static void threaded_dlmalloc(void) { + int heaps, cpus; + const char *var; + + if (!_weaken(pthread_create)) + return use_single_heap(false); + + if (!IsAarch64() && !X86_HAVE(RDTSCP)) + return use_single_heap(true); + + // determine how many independent heaps we should install + // by default we do an approximation of one heap per core + // this code makes the c++ stl go 164x faster on my ryzen + cpus = __get_cpu_count(); + if (cpus == -1) + heaps = 1; + else if (!IsAarch64() && !X86_HAVE(RDTSCP)) + heaps = 1; + else if ((var = getenv("COSMOPOLITAN_HEAP_COUNT"))) + heaps = dlmalloc_atoi(var); + else + heaps = cpus >> 1; + if (heaps <= 1) + return use_single_heap(true); + if (heaps > ARRAYLEN(g_heaps)) + heaps = ARRAYLEN(g_heaps); + + // find 𝑑 such that sched_getcpu() / 𝑑 is within [0,heaps) + // turn 𝑑 into a fast magic that can divide by multiplying + magiu = __magicu_get(cpus / heaps); + + // we need this too due to linux's cpu count affinity hack + g_heapslen = heaps; + + // create the arenas + for (size_t i = 0; i < g_heapslen; ++i) + if (!(g_heaps[i] = create_mspace(0, true))) + __builtin_trap(); + + // install function pointers + dlmalloc = dlmalloc_threaded; + dlcalloc = dlcalloc_threaded; + dlrealloc = dlrealloc_threaded; + dlmemalign = dlmemalign_threaded; + dlmallinfo = dlmallinfo_threaded; + + STRACE("created %d dlmalloc arenas for %d cpus", heaps, cpus); +} diff --git a/third_party/double-conversion/BUILD.mk b/third_party/double-conversion/BUILD.mk index 0f80daa39..10da7f072 100644 --- a/third_party/double-conversion/BUILD.mk +++ b/third_party/double-conversion/BUILD.mk @@ -31,6 +31,7 @@ THIRD_PARTY_DOUBLECONVERSION_A_CHECKS = \ THIRD_PARTY_DOUBLECONVERSION_A_DIRECTDEPS = \ LIBC_INTRIN \ + LIBC_MEM \ LIBC_STR \ LIBC_TINYMATH \ THIRD_PARTY_LIBCXXABI diff --git a/third_party/double-conversion/test/BUILD.mk b/third_party/double-conversion/test/BUILD.mk index b601bd7c6..41a5bc39c 100644 --- a/third_party/double-conversion/test/BUILD.mk +++ b/third_party/double-conversion/test/BUILD.mk @@ -42,6 +42,7 @@ THIRD_PARTY_DOUBLECONVERSION_TEST_DIRECTDEPS = \ LIBC_NEXGEN32E \ LIBC_RUNTIME \ LIBC_FMT \ + LIBC_MEM \ LIBC_SYSV \ LIBC_STR \ LIBC_TINYMATH \ diff --git a/third_party/double-conversion/test/cctest.h b/third_party/double-conversion/test/cctest.h index 0ab162b76..41947b667 100644 --- a/third_party/double-conversion/test/cctest.h +++ b/third_party/double-conversion/test/cctest.h @@ -26,6 +26,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CCTEST_H_ #define CCTEST_H_ +#include "libc/stdio/stdio.h" #include "third_party/double-conversion/utils.h" #ifndef TEST diff --git a/third_party/getopt/BUILD.mk b/third_party/getopt/BUILD.mk index 7e6b1e0cb..535e43fd7 100644 --- a/third_party/getopt/BUILD.mk +++ b/third_party/getopt/BUILD.mk @@ -34,6 +34,8 @@ $(THIRD_PARTY_GETOPT_A).pkg: \ $(THIRD_PARTY_GETOPT_A_OBJS) \ $(foreach x,$(THIRD_PARTY_GETOPT_A_DIRECTDEPS),$($(x)_A).pkg) +$(THIRD_PARTY_GETOPT_A_OBJS): private COPTS += -fno-sanitize=address + THIRD_PARTY_GETOPT_LIBS = $(foreach x,$(THIRD_PARTY_GETOPT_ARTIFACTS),$($(x))) THIRD_PARTY_GETOPT_SRCS = $(foreach x,$(THIRD_PARTY_GETOPT_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_GETOPT_HDRS = $(foreach x,$(THIRD_PARTY_GETOPT_ARTIFACTS),$($(x)_HDRS)) diff --git a/third_party/libunwind/BUILD.mk b/third_party/libunwind/BUILD.mk index 6f006bd2e..242d4f8d1 100644 --- a/third_party/libunwind/BUILD.mk +++ b/third_party/libunwind/BUILD.mk @@ -70,15 +70,7 @@ $(THIRD_PARTY_LIBUNWIND_A).pkg: \ $(foreach x,$(THIRD_PARTY_LIBUNWIND_A_DIRECTDEPS),$($(x)_A).pkg) $(THIRD_PARTY_LIBUNWIND_A_OBJS): private \ - CFLAGS += \ - -fexceptions \ - -fno-sanitize=all \ - -ffunction-sections \ - -fdata-sections \ - -D_LIBUNWIND_USE_DLADDR=0 - -$(THIRD_PARTY_LIBUNWIND_A_OBJS): private \ - CXXFLAGS += \ + COPTS += \ -fexceptions \ -fno-sanitize=all \ -ffunction-sections \ diff --git a/third_party/lua/BUILD.mk b/third_party/lua/BUILD.mk index a08cae0c4..e4dc7d7d8 100644 --- a/third_party/lua/BUILD.mk +++ b/third_party/lua/BUILD.mk @@ -230,6 +230,7 @@ THIRD_PARTY_LUA_LUA_DIRECTDEPS = \ LIBC_NEXGEN32E \ LIBC_STDIO \ LIBC_LOG \ + LIBC_MEM \ LIBC_STR \ LIBC_SYSV \ LIBC_THREAD \ @@ -262,6 +263,7 @@ THIRD_PARTY_LUA_LUAC_DIRECTDEPS = \ LIBC_NEXGEN32E \ LIBC_RUNTIME \ LIBC_STDIO \ + LIBC_MEM \ LIBC_STR \ LIBC_SYSV \ THIRD_PARTY_LUA \ diff --git a/third_party/python/Objects/obmalloc.c b/third_party/python/Objects/obmalloc.c index e9cf4fd54..47a793fec 100644 --- a/third_party/python/Objects/obmalloc.c +++ b/third_party/python/Objects/obmalloc.c @@ -95,13 +95,6 @@ static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); static inline void * _PyMem_RawMalloc(void *ctx, size_t size) { -#ifdef __COSMOPOLITAN__ -#ifdef __SANITIZE_ADDRESS__ - return __asan_memalign(16, size); -#else - return dlmalloc(size); -#endif -#else /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL for malloc(0), which would be treated as an error. Some platforms would return a pointer with no memory behind it, which would break pymalloc. @@ -109,19 +102,11 @@ _PyMem_RawMalloc(void *ctx, size_t size) if (size == 0) size = 1; return malloc(size); -#endif } static inline void * _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) { -#ifdef __COSMOPOLITAN__ -#ifdef __SANITIZE_ADDRESS__ - return __asan_calloc(nelem, elsize); -#else - return dlcalloc(nelem, elsize); -#endif -#else /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL for calloc(0, 0), which would be treated as an error. Some platforms would return a pointer with no memory behind it, which would break @@ -131,7 +116,6 @@ _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) elsize = 1; } return calloc(nelem, elsize); -#endif } static inline void * @@ -139,29 +123,13 @@ _PyMem_RawRealloc(void *ctx, void *ptr, size_t size) { if (size == 0) size = 1; -#ifdef __COSMOPOLITAN__ -#ifdef __SANITIZE_ADDRESS__ - return __asan_realloc(ptr, size); -#else - return dlrealloc(ptr, size); -#endif -#else return realloc(ptr, size); -#endif } static inline void _PyMem_RawFree(void *ctx, void *ptr) { -#ifdef __COSMOPOLITAN__ -#ifdef __SANITIZE_ADDRESS__ - __asan_free(ptr); -#else - dlfree(ptr); -#endif -#else free(ptr); -#endif } #ifdef MS_WINDOWS