Improve memory safety

This commit makes numerous refinements to cosmopolitan memory handling.

The default stack size has been reduced from 2mb to 128kb. A new macro
is now provided so you can easily reconfigure the stack size to be any
value you want. Work around the breaking change by adding to your main:

    STATIC_STACK_SIZE(0x00200000);  // 2mb stack

If you're not sure how much stack you need, then you can use:

    STATIC_YOINK("stack_usage_logging");

After which you can `sort -nr o/$MODE/stack.log`. Based on the unit test
suite, nothing in the Cosmopolitan repository (except for Python) needs
a stack size greater than 30kb. There are also new macros for detecting
the size and address of the stack at runtime, e.g. GetStackAddr(). We
also now support sigaltstack() so if you want to see nice looking crash
reports whenever a stack overflow happens, you can put this in main():

    ShowCrashReports();

Under `make MODE=dbg` and `make MODE=asan` the unit testing framework
will now automatically print backtraces of memory allocations when
things like memory leaks happen. Bugs are now fixed in ASAN global
variable overrun detection. The memtrack and asan runtimes also handle
edge cases now. The new tools helped to identify a few memory leaks,
which are fixed by this change.

This change should fix an issue reported in #288 with ARG_MAX limits.
Fixing this doubled the performance of MKDEPS.COM and AR.COM yet again.
This commit is contained in:
Justine Tunney 2021-10-13 17:27:13 -07:00
parent a0b39f886c
commit 226aaf3547
317 changed files with 6474 additions and 3993 deletions

View file

@ -16,16 +16,16 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/thread/attr.h"
#include "libc/errno.h"
#include "libc/thread/attr.h"
#define MIN_STACKSIZE (8*PAGESIZE)
#define MIN_STACKSIZE (8 * PAGESIZE)
#define MIN_GUARDSIZE PAGESIZE
// CTOR/DTOR
int cthread_attr_init(cthread_attr_t* attr) {
attr->stacksize = 1024*PAGESIZE; // 4 MiB
attr->guardsize = 16*PAGESIZE; // 64 KiB
attr->stacksize = 1024 * PAGESIZE; // 4 MiB
attr->guardsize = 16 * PAGESIZE; // 64 KiB
attr->mode = CTHREAD_CREATE_JOINABLE;
return 0;
}
@ -36,7 +36,7 @@ int cthread_attr_destroy(cthread_attr_t* attr) {
// stacksize
int cthread_attr_setstacksize(cthread_attr_t* attr, size_t size) {
if (size & (PAGESIZE-1)) return EINVAL;
if (size & (PAGESIZE - 1)) return EINVAL;
if (size < MIN_STACKSIZE) return EINVAL;
attr->stacksize = size;
return 0;
@ -47,7 +47,7 @@ size_t cthread_attr_getstacksize(const cthread_attr_t* attr) {
// guardsize
int cthread_attr_setguardsize(cthread_attr_t* attr, size_t size) {
if (size & (PAGESIZE-1)) return EINVAL;
if (size & (PAGESIZE - 1)) return EINVAL;
if (size < MIN_GUARDSIZE) return EINVAL;
attr->guardsize = size;
return 0;
@ -58,7 +58,8 @@ size_t cthread_attr_getguardsize(const cthread_attr_t* attr) {
// detachstate
int cthread_attr_setdetachstate(cthread_attr_t* attr, int mode) {
if (mode & ~(CTHREAD_CREATE_JOINABLE | CTHREAD_CREATE_DETACHED)) return EINVAL;
if (mode & ~(CTHREAD_CREATE_JOINABLE | CTHREAD_CREATE_DETACHED))
return EINVAL;
attr->mode = mode;
return 0;
}

View file

@ -1,5 +1,9 @@
#ifndef COSMOPOLITAN_LIBC_THREAD_ATTR_H_
#define COSMOPOLITAN_LIBC_THREAD_ATTR_H_
#define CTHREAD_CREATE_DETACHED 1
#define CTHREAD_CREATE_JOINABLE 0
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -7,27 +11,17 @@ COSMOPOLITAN_C_START_
* @fileoverview cosmopolitan thread attributes
*/
#define CTHREAD_CREATE_DETACHED 1
#define CTHREAD_CREATE_JOINABLE 0
typedef struct cthread_attr_t {
size_t stacksize, guardsize;
int mode;
size_t stacksize, guardsize;
int mode;
} cthread_attr_t;
// CTOR/DTOR
int cthread_attr_init(cthread_attr_t*);
int cthread_attr_destroy(cthread_attr_t*);
// stacksize
int cthread_attr_setstacksize(cthread_attr_t*, size_t);
size_t thread_attr_getstacksize(const cthread_attr_t*);
// guardsize
int cthread_attr_setguardsize(cthread_attr_t*, size_t);
size_t cthread_attr_getguardsize(const cthread_attr_t*);
// detachstate
int cthread_attr_setdetachstate(cthread_attr_t*, int);
int cthread_attr_getdetachstate(const cthread_attr_t*);

View file

@ -16,41 +16,44 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/thread/create.h"
#include "libc/errno.h"
#include "libc/linux/clone.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/prot.h"
#include "libc/errno.h"
#include "libc/thread/create.h"
static cthread_t _thread_allocate(const cthread_attr_t* attr) {
size_t stacksize = attr->stacksize;
size_t guardsize = attr->guardsize;
// FIXME: properly count TLS size
size_t tlssize = 0;
size_t totalsize = 3*guardsize + stacksize + tlssize + sizeof(struct cthread_descriptor_t);
totalsize = (totalsize + PAGESIZE-1) & -PAGESIZE;
uintptr_t mem = (uintptr_t)mmap(NULL, totalsize, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
size_t totalsize =
3 * guardsize + stacksize + tlssize + sizeof(struct cthread_descriptor_t);
totalsize = (totalsize + PAGESIZE - 1) & -PAGESIZE;
uintptr_t mem = (uintptr_t)mmap(NULL, totalsize, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == -1) return NULL;
void* alloc_bottom = (void*) mem;
void* alloc_bottom = (void*)mem;
void* stack_bottom = (void*)(mem + guardsize);
void* stack_top = (void*)(mem + guardsize + stacksize);
void* tls_bottom = (void*)(mem + guardsize + stacksize + guardsize);
void* tls_top = (void*)(mem + totalsize - guardsize);
void* alloc_top = (void*)(mem + totalsize);
if (mprotect(stack_bottom, (uintptr_t)stack_top - (uintptr_t)stack_bottom, PROT_READ | PROT_WRITE) != 0 ||
mprotect(tls_bottom, (uintptr_t)tls_top - (uintptr_t)tls_bottom, PROT_READ | PROT_WRITE) != 0) {
if (mprotect(stack_bottom, (uintptr_t)stack_top - (uintptr_t)stack_bottom,
PROT_READ | PROT_WRITE) != 0 ||
mprotect(tls_bottom, (uintptr_t)tls_top - (uintptr_t)tls_bottom,
PROT_READ | PROT_WRITE) != 0) {
munmap(alloc_bottom, totalsize);
return NULL;
}
cthread_t td = (cthread_t)tls_top - 1;
td->self = td;
td->stack.top = stack_top;
@ -59,45 +62,48 @@ static cthread_t _thread_allocate(const cthread_attr_t* attr) {
td->tls.bottom = tls_bottom;
td->alloc.top = alloc_top;
td->alloc.bottom = alloc_bottom;
td->state = (attr->mode & CTHREAD_CREATE_DETACHED) ? cthread_detached : cthread_started;
td->state = (attr->mode & CTHREAD_CREATE_DETACHED) ? cthread_detached
: cthread_started;
return td;
}
int cthread_create(cthread_t*restrict p, const cthread_attr_t*restrict attr, int (*func)(void*), void*restrict arg) {
extern wontreturn void _thread_run(int(*func)(void*), void* arg);
int cthread_create(cthread_t* restrict p, const cthread_attr_t* restrict attr,
int (*func)(void*), void* restrict arg) {
extern wontreturn void _thread_run(int (*func)(void*), void* arg);
cthread_attr_t default_attr;
cthread_attr_init(&default_attr);
cthread_t td = _thread_allocate(attr ? attr : &default_attr);
cthread_attr_destroy(&default_attr);
if (!td) return errno;
*p = td;
register cthread_t td_ asm("r8") = td;
register int* ptid_ asm("rdx") = &td->tid;
register int* ctid_ asm("r10") = &td->tid;
register int(*func_)(void*) asm("r12") = func;
register int (*func_)(void*) asm("r12") = func;
register void* arg_ asm("r13") = arg;
long flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_PARENT | CLONE_THREAD | /*CLONE_IO |*/ CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
long flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_PARENT | CLONE_THREAD | /*CLONE_IO |*/ CLONE_SETTLS |
CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
int rc;
// asm ensures the (empty) stack of the child thread is not used
asm volatile(
"syscall\n\t" // clone
"test\t%0, %0\n\t" // if not child
"jne\t.L.cthread_create.%=\n\t" // jump to `parent` label
"xor\t%%rbp, %%rbp\n\t" // reset stack frame pointer
"mov\t%2, %%rdi\n\t"
"call\t*%1\n\t" // call `func(arg)`
"mov\t%%rax, %%rdi\n\t"
"jmp\tcthread_exit\n" // exit thread
".L.cthread_create.%=:"
: "=a"(rc)
: "r"(func_), "r"(arg_), "0"(__NR_clone), "D"(flags), "S"(td->stack.top), "r"(ptid_), "r"(ctid_), "r"(td_)
: "rcx", "r11", "cc", "memory"
);
asm volatile("syscall\n\t" // clone
"test\t%0, %0\n\t" // if not child
"jne\t.L.cthread_create.%=\n\t" // jump to `parent` label
"xor\t%%rbp, %%rbp\n\t" // reset stack frame pointer
"mov\t%2, %%rdi\n\t"
"call\t*%1\n\t" // call `func(arg)`
"mov\t%%rax, %%rdi\n\t"
"jmp\tcthread_exit\n" // exit thread
".L.cthread_create.%=:"
: "=a"(rc)
: "r"(func_), "r"(arg_), "0"(__NR_clone), "D"(flags),
"S"(td->stack.top), "r"(ptid_), "r"(ctid_), "r"(td_)
: "rcx", "r11", "cc", "memory");
if (__builtin_expect(rc < 0, 0)) {
// `clone` has failed. The thread must be deallocated.
size_t size = (intptr_t)(td->alloc.top) - (intptr_t)(td->alloc.bottom);

View file

@ -9,9 +9,9 @@ COSMOPOLITAN_C_START_
* @fileoverview Create a cosmopolitan thread
*/
int cthread_create(cthread_t*restrict, const cthread_attr_t*restrict, int (*)(void*), void*restrict);
int cthread_create(cthread_t* restrict, const cthread_attr_t* restrict,
int (*)(void*), void* restrict);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_THREAD_CREATE_H_ */
#endif /* COSMOPOLITAN_LIBC_THREAD_CREATE_H_ */

View file

@ -14,9 +14,8 @@ enum cthread_state {
cthread_detached = 4,
};
struct cthread_descriptor_t {
struct cthread_descriptor_t* self; // mandatory for TLS
struct cthread_descriptor_t* self; /* mandatory for TLS */
struct {
void *top, *bottom;
} stack, tls, alloc;

View file

@ -16,13 +16,16 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/thread/detach.h"
#include "libc/thread/descriptor.h"
#include "libc/runtime/runtime.h"
#include "libc/thread/descriptor.h"
#include "libc/thread/detach.h"
int cthread_detach(cthread_t td) {
int state;
asm volatile("lock xadd\t%1, %0" : "+m"(td->state), "=r"(state) : "1"(cthread_detached) : "cc");
asm volatile("lock xadd\t%1, %0"
: "+m"(td->state), "=r"(state)
: "1"(cthread_detached)
: "cc");
if ((state & cthread_finished)) {
size_t size = (intptr_t)(td->alloc.top) - (intptr_t)(td->alloc.bottom);
munmap(td->alloc.bottom, size);

View file

@ -16,29 +16,28 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/sysv/consts/nr.h"
#include "libc/thread/descriptor.h"
#include "libc/thread/exit.h"
#include "libc/thread/self.h"
#include "libc/thread/descriptor.h"
#include "libc/sysv/consts/nr.h"
wontreturn void cthread_exit(int rc) {
cthread_t td = cthread_self();
td->rc = rc;
size_t size = (intptr_t)(td->alloc.top) - (intptr_t)(td->alloc.bottom);
int state;
asm volatile(
"lock xadd\t%1, %0\n\t" // mark thread as finished
"test\t%2, %b1\n\t" // test if thread was detached
"jz .L.cthread_exit.%=\n\t" // skip unmap if not detached
"syscall\n" // unmap thread
".L.cthread_exit.%=:\n\t"
"mov\t%%rbx, %%rdi\n\t" //rc
"mov\t$60, %%rax\n\t"
"syscall" // thread exit
: "+m"(td->state), "=&r"(state)
: "I"(cthread_detached), "1"(cthread_finished), "a"(__NR_munmap), "b"(rc), "D"(td->alloc.bottom), "S"(size)
: "rcx", "r11", "cc", "memory"
);
asm volatile("lock xadd\t%1, %0\n\t" // mark thread as finished
"test\t%2, %b1\n\t" // test if thread was detached
"jz .L.cthread_exit.%=\n\t" // skip unmap if not detached
"syscall\n" // unmap thread
".L.cthread_exit.%=:\n\t"
"mov\t%%rbx, %%rdi\n\t" // rc
"mov\t$60, %%rax\n\t"
"syscall" // thread exit
: "+m"(td->state), "=&r"(state)
: "I"(cthread_detached), "1"(cthread_finished), "a"(__NR_munmap),
"b"(rc), "D"(td->alloc.bottom), "S"(size)
: "rcx", "r11", "cc", "memory");
unreachable;
}

View file

@ -16,33 +16,35 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/thread/join.h"
#include "libc/thread/descriptor.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/thread/descriptor.h"
#include "libc/thread/join.h"
int cthread_join(cthread_t td, int* rc) {
int tid = td->tid; // tid must be loaded before lock xadd
int tid = td->tid; // tid must be loaded before lock xadd
// otherwise, tid could be set to 0 even though `state` is not finished
// mark thread as joining
int state;
asm volatile("lock xadd\t%1, %0" : "+m"(td->state), "=r"(state) : "1"(cthread_joining) : "cc");
asm volatile("lock xadd\t%1, %0"
: "+m"(td->state), "=r"(state)
: "1"(cthread_joining)
: "cc");
if (!(state & cthread_finished)) {
int flags = FUTEX_WAIT; // PRIVATE makes it hang
int flags = FUTEX_WAIT; // PRIVATE makes it hang
register struct timespec* timeout asm("r10") = NULL;
asm volatile (
"syscall"
:
: "a"(__NR_futex), "D"(&td->tid), "S"(flags), "d"(tid), "r"(timeout)
: "rcx", "r11", "cc", "memory"
);
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_futex), "D"(&td->tid), "S"(flags), "d"(tid),
"r"(timeout)
: "rcx", "r11", "cc", "memory");
}
*rc = td->rc;
size_t size = (intptr_t)(td->alloc.top) - (intptr_t)(td->alloc.bottom);
munmap(td->alloc.bottom, size);
return 0;

View file

@ -16,11 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/thread/nativesem.h"
#include "libc/thread/yield.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/futex.h"
#include "libc/bits/atomic.h"
#define CTHREAD_THREAD_VAL_BITS 32
@ -35,78 +35,98 @@ int cthread_native_sem_destroy(cthread_native_sem_t* sem) {
int cthread_native_sem_signal(cthread_native_sem_t* sem) {
uint64_t count;
asm volatile("lock xadd\t%1, %0" : "+m"(sem->linux.count), "=r"(count) : "1"(1) : "cc");
asm volatile("lock xadd\t%1, %0"
: "+m"(sem->linux.count), "=r"(count)
: "1"(1)
: "cc");
if ((count >> CTHREAD_THREAD_VAL_BITS)) {
int flags = FUTEX_WAKE;
// WARNING: an offset of 4 bytes would be required on little-endian archs
void* wait_address = &sem->linux.count;
asm volatile (
"syscall"
:
: "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(1)
: "rcx", "r11", "cc", "memory"
);
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(1)
: "rcx", "r11", "cc", "memory");
}
return 0;
}
int cthread_native_sem_wait_slow(cthread_native_sem_t* sem, const struct timespec* timeout) {
int cthread_native_sem_wait_slow(cthread_native_sem_t* sem,
const struct timespec* timeout) {
uint64_t count;
// record current thread as waiter
asm volatile("lock xadd\t%1, %0" : "+m"(sem->linux.count), "=r"(count) : "1"((uint64_t)1 << CTHREAD_THREAD_VAL_BITS) : "cc");
asm volatile("lock xadd\t%1, %0"
: "+m"(sem->linux.count), "=r"(count)
: "1"((uint64_t)1 << CTHREAD_THREAD_VAL_BITS)
: "cc");
for (;;) {
// try to acquire the semaphore, as well as remove itself from waiters
if ((uint32_t)count > 0 && atomic_compare_exchange_weak(&sem->linux.count, count, count - 1 - ((uint64_t)1 << CTHREAD_THREAD_VAL_BITS))) break;
if ((uint32_t)count > 0 &&
atomic_compare_exchange_weak(
&sem->linux.count, count,
count - 1 - ((uint64_t)1 << CTHREAD_THREAD_VAL_BITS)))
break;
int flags = FUTEX_WAIT;
register struct timespec* timeout_ asm("r10") = timeout;
// WARNING: an offset of 4 bytes would be required on little-endian archs
void* wait_address = &sem->linux.count;
asm volatile (
"syscall"
:
: "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(count), "r"(timeout_)
: "rcx", "r11", "cc", "memory"
);
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(count),
"r"(timeout_)
: "rcx", "r11", "cc", "memory");
count = atomic_load(&sem->linux.count);
}
return 0;
}
int cthread_native_sem_wait_spin_yield(cthread_native_sem_t* sem, uint64_t count, int yield, const struct timespec* timeout) {
int cthread_native_sem_wait_spin_yield(cthread_native_sem_t* sem,
uint64_t count, int yield,
const struct timespec* timeout) {
// spin on yield
while (yield-- > 0) {
if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) break; // a thread is already waiting in queue
if ((uint32_t)count > 0 && atomic_compare_exchange_weak(&sem->linux.count, count, count-1)) return 0;
if ((count >> CTHREAD_THREAD_VAL_BITS) != 0)
break; // a thread is already waiting in queue
if ((uint32_t)count > 0 &&
atomic_compare_exchange_weak(&sem->linux.count, count, count - 1))
return 0;
cthread_yield();
}
return cthread_native_sem_wait_slow(sem, timeout);
}
int cthread_native_sem_wait_spin(cthread_native_sem_t* sem, uint64_t count, int spin, int yield, const struct timespec* timeout) {
int cthread_native_sem_wait_spin(cthread_native_sem_t* sem, uint64_t count,
int spin, int yield,
const struct timespec* timeout) {
// spin on pause
while (spin-- > 0) {
if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) break;
if ((uint32_t)count > 0 && atomic_compare_exchange_weak(&sem->linux.count, count, count-1)) return 0;
asm volatile ("pause");
if ((uint32_t)count > 0 &&
atomic_compare_exchange_weak(&sem->linux.count, count, count - 1))
return 0;
asm volatile("pause");
}
return cthread_native_sem_wait_spin_yield(sem, count, yield, timeout);
}
int cthread_native_sem_wait(cthread_native_sem_t* sem, int spin, int yield, const struct timespec* timeout) {
int cthread_native_sem_wait(cthread_native_sem_t* sem, int spin, int yield,
const struct timespec* timeout) {
uint64_t count = atomic_load(&sem->linux.count);
// uncontended
if ((count >> 32) == 0 && (uint32_t)count > 0 && atomic_compare_exchange_weak(&sem->linux.count, count, count-1)) return 0;
if ((count >> 32) == 0 && (uint32_t)count > 0 &&
atomic_compare_exchange_weak(&sem->linux.count, count, count - 1))
return 0;
return cthread_native_sem_wait_spin(sem, count, spin, yield, timeout);
}

View file

@ -8,13 +8,16 @@ COSMOPOLITAN_C_START_
* @fileoverview get the thread descriptor of the current thread
*/
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
inline cthread_t cthread_self(void) {
cthread_t self;
asm ("mov %%fs:0, %0" : "=r"(self));
asm("mov\t%%fs:0,%0" : "=r"(self));
return self;
}
#else
cthread_t cthread_self(void);
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_THREAD_SELF_H_ */
#endif /* COSMOPOLITAN_LIBC_THREAD_SELF_H_ */

View file

@ -16,8 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/thread/yield.h"
#include "libc/calls/calls.h"
#include "libc/thread/yield.h"
int cthread_yield(void) {
return sched_yield();