Reduce stack virtual memory consumption on Linux

This commit is contained in:
Justine Tunney 2024-12-25 19:43:43 -08:00
parent cc8a9eb93c
commit 36e5861b0c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
31 changed files with 583 additions and 166 deletions

View file

@ -337,7 +337,7 @@ int main(int argc, char *argv[]) {
sigaddset(&block, SIGQUIT); sigaddset(&block, SIGQUIT);
pthread_attr_t attr; pthread_attr_t attr;
unassert(!pthread_attr_init(&attr)); unassert(!pthread_attr_init(&attr));
unassert(!pthread_attr_setstacksize(&attr, 65536)); unassert(!pthread_attr_setstacksize(&attr, 65536 - getpagesize()));
unassert(!pthread_attr_setguardsize(&attr, getpagesize())); unassert(!pthread_attr_setguardsize(&attr, getpagesize()));
unassert(!pthread_attr_setsigmask_np(&attr, &block)); unassert(!pthread_attr_setsigmask_np(&attr, &block));
unassert(!pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0)); unassert(!pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0));

View file

@ -7,9 +7,13 @@
http://creativecommons.org/publicdomain/zero/1.0/ │ http://creativecommons.org/publicdomain/zero/1.0/ │
*/ */
#endif #endif
#include "libc/dce.h"
#include "libc/intrin/maps.h"
#include "libc/mem/alg.h" #include "libc/mem/alg.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/winargs.internal.h"
#include "libc/stdio/stdio.h" #include "libc/stdio/stdio.h"
#include "libc/x/xasprintf.h" #include "libc/x/xasprintf.h"
@ -67,8 +71,18 @@ int main(int argc, char *argv[]) {
Append((uintptr_t)&__auxv[i + 1], Append((uintptr_t)&__auxv[i + 1],
xasprintf("&auxv[%d] = %#lx", i + 1, __auxv[i + 1])); xasprintf("&auxv[%d] = %#lx", i + 1, __auxv[i + 1]));
} }
if (!IsWindows()) {
struct AddrSize stak = __get_main_stack();
Append((intptr_t)stak.addr + stak.size, "top of stack");
Append((intptr_t)stak.addr, "bottom of stack");
} else {
#ifdef __x86_64__
Append(GetStaticStackAddr(0) + GetStaticStackSize(), "top of stack");
Append(GetStaticStackAddr(0) + GetGuardSize(), "bottom of stack");
Append(GetStaticStackAddr(0), "bottom of guard region");
#endif
}
qsort(things.p, things.n, sizeof(*things.p), Compare); qsort(things.p, things.n, sizeof(*things.p), Compare);
for (int i = 0; i < things.n; ++i) { for (int i = 0; i < things.n; ++i)
printf("%012lx %s\n", things.p[i].i, things.p[i].s); printf("%012lx %s\n", things.p[i].i, things.p[i].s);
} }
}

17
examples/thread.c Normal file
View file

@ -0,0 +1,17 @@
#include <pthread.h>
#include <stdio.h>
// how to spawn a thread
void *my_thread(void *arg) {
printf("my_thread(%p) is running\n", arg);
return (void *)0x456L;
}
int main(int argc, char *argv[]) {
void *res;
pthread_t th;
pthread_create(&th, 0, my_thread, (void *)0x123L);
pthread_join(th, &res);
printf("my_thread() returned %p\n", res);
}

View file

@ -21,6 +21,7 @@
#include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/intrin/describeflags.h" #include "libc/intrin/describeflags.h"
#include "libc/intrin/rlimit.h"
#include "libc/intrin/strace.h" #include "libc/intrin/strace.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h" #include "libc/runtime/stack.h"
@ -47,8 +48,7 @@ int getrlimit(int resource, struct rlimit *rlim) {
} else if (!IsWindows()) { } else if (!IsWindows()) {
rc = sys_getrlimit(resource, rlim); rc = sys_getrlimit(resource, rlim);
} else if (resource == RLIMIT_STACK) { } else if (resource == RLIMIT_STACK) {
rlim->rlim_cur = GetStaticStackSize(); *rlim = __rlimit_stack_get();
rlim->rlim_max = GetStaticStackSize();
rc = 0; rc = 0;
} else if (resource == RLIMIT_AS) { } else if (resource == RLIMIT_AS) {
rlim->rlim_cur = __virtualmax; rlim->rlim_cur = __virtualmax;

View file

@ -23,6 +23,7 @@
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/describeflags.h" #include "libc/intrin/describeflags.h"
#include "libc/intrin/rlimit.h"
#include "libc/intrin/strace.h" #include "libc/intrin/strace.h"
#include "libc/macros.h" #include "libc/macros.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
@ -88,10 +89,12 @@ int setrlimit(int resource, const struct rlimit *rlim) {
} else if (!IsWindows() && !(IsNetbsd() && resource == RLIMIT_AS)) { } else if (!IsWindows() && !(IsNetbsd() && resource == RLIMIT_AS)) {
rc = sys_setrlimit(resource, rlim); rc = sys_setrlimit(resource, rlim);
} else if (resource == RLIMIT_STACK) { } else if (resource == RLIMIT_STACK) {
rc = enotsup(); rc = 0;
} else { } else {
rc = einval(); rc = einval();
} }
if (!rc && resource == RLIMIT_STACK)
__rlimit_stack_set(*rlim); // so __rlimit_stack_get() works on all OSes
if (resource == RLIMIT_AS) { if (resource == RLIMIT_AS) {
__virtualmax = rlim->rlim_cur; __virtualmax = rlim->rlim_cur;
errno = olde; errno = olde;

View file

@ -25,8 +25,8 @@ int cosmo_futex_wake(_COSMO_ATOMIC(int) *, int, char);
int cosmo_futex_wait(_COSMO_ATOMIC(int) *, int, char, int, int cosmo_futex_wait(_COSMO_ATOMIC(int) *, int, char, int,
const struct timespec *); const struct timespec *);
errno_t cosmo_stack_alloc(unsigned *, unsigned *, void **) libcesque; errno_t cosmo_stack_alloc(size_t *, size_t *, void **) libcesque;
errno_t cosmo_stack_free(void *, unsigned, unsigned) libcesque; errno_t cosmo_stack_free(void *, size_t, size_t) libcesque;
void cosmo_stack_clear(void) libcesque; void cosmo_stack_clear(void) libcesque;
void cosmo_stack_setmaxstacks(int) libcesque; void cosmo_stack_setmaxstacks(int) libcesque;
int cosmo_stack_getmaxstacks(void) libcesque; int cosmo_stack_getmaxstacks(void) libcesque;

View file

@ -16,12 +16,15 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/dce.h"
#include "libc/intrin/describeflags.h" #include "libc/intrin/describeflags.h"
#include "libc/macros.h" #include "libc/macros.h"
#include "libc/nt/enum/consolemodeflags.h" #include "libc/nt/enum/consolemodeflags.h"
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
#define MAP_GROWSDOWN_LINUX 0x00000100
const char *_DescribeMapFlags(char buf[64], int x) { const char *_DescribeMapFlags(char buf[64], int x) {
const struct DescribeFlags kMapFlags[] = { const struct DescribeFlags kMapFlags[] = {
{MAP_PRIVATE, "PRIVATE"}, // {MAP_PRIVATE, "PRIVATE"}, //
@ -35,6 +38,7 @@ const char *_DescribeMapFlags(char buf[64], int x) {
{MAP_NORESERVE, "NORESERVE"}, // {MAP_NORESERVE, "NORESERVE"}, //
{MAP_NONBLOCK, "NONBLOCK"}, // {MAP_NONBLOCK, "NONBLOCK"}, //
{MAP_POPULATE, "POPULATE"}, // {MAP_POPULATE, "POPULATE"}, //
{IsLinux() ? MAP_GROWSDOWN_LINUX : 0, "GROWSDOWN"}, //
}; };
return _DescribeFlags(buf, 64, kMapFlags, ARRAYLEN(kMapFlags), "MAP_", x); return _DescribeFlags(buf, 64, kMapFlags, ARRAYLEN(kMapFlags), "MAP_", x);
} }

View file

@ -21,6 +21,8 @@
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
const char *_DescribeProtFlags(char buf[48], int x) { const char *_DescribeProtFlags(char buf[48], int x) {
if (!x)
return "PROT_NONE";
const struct DescribeFlags kProtFlags[] = { const struct DescribeFlags kProtFlags[] = {
{PROT_READ, "READ"}, // {PROT_READ, "READ"}, //
{PROT_WRITE, "WRITE"}, // {PROT_WRITE, "WRITE"}, //

View file

@ -17,16 +17,13 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/calls/struct/rlimit.h" #include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/rlimit.internal.h"
#include "libc/dce.h"
#include "libc/intrin/getauxval.h" #include "libc/intrin/getauxval.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/maps.h" #include "libc/intrin/maps.h"
#include "libc/intrin/rlimit.h"
#include "libc/macros.h" #include "libc/macros.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/auxv.h" #include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/rlim.h"
#include "libc/sysv/consts/rlimit.h"
// Hack for guessing boundaries of _start()'s stack // Hack for guessing boundaries of _start()'s stack
// //
@ -91,12 +88,9 @@ static uintptr_t __get_main_top(int pagesz) {
} }
static size_t __get_stack_size(int pagesz, uintptr_t start, uintptr_t top) { static size_t __get_stack_size(int pagesz, uintptr_t start, uintptr_t top) {
size_t size, max = 8 * 1024 * 1024; size_t stacksz = __rlimit_stack_get().rlim_cur;
struct rlimit rlim = {RLIM_INFINITY}; stacksz = MIN(stacksz, 1024ul * 1024 * 1024 * 1024);
sys_getrlimit(RLIMIT_STACK, &rlim); return MAX(ROUNDDOWN(stacksz, pagesz), ROUNDUP(top - start, pagesz));
if ((size = rlim.rlim_cur) > max)
size = max;
return MAX(ROUNDUP(size, pagesz), ROUNDUP(top - start, pagesz));
} }
/** /**

50
libc/intrin/lockless.h Normal file
View file

@ -0,0 +1,50 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_
#define COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_
#include "libc/atomic.h"
#include "libc/intrin/atomic.h"
COSMOPOLITAN_C_START_
// lockless memory transactions
//
// - one writer
// - many readers
// - generation is monotonic
// - even numbers mean memory is ready
// - odd numbers mean memory is actively being changed
// - always use acquire semantics inside your read transaction
//
// let's say you want to be able to atomically read and write to 128-bit
// values, but you've only got a 64-bit system. if you expect that it'll
// frequently written, then you should use a mutex. but if you expect it
// to be frequently read and rarely written, then it's possible to do it
// without a mutex; in fact you don't even need the x86 lock instruction
// prefix; all that is required is a series of carefully ordered mov ops
// which are designed to exploit the strong ordering of the architecture
static inline unsigned lockless_write_begin(atomic_uint* genptr) {
unsigned gen = atomic_load_explicit(genptr, memory_order_acquire);
atomic_store_explicit(genptr, gen + 1, memory_order_release);
return gen;
}
static inline void lockless_write_end(atomic_uint* genptr, unsigned gen) {
atomic_store_explicit(genptr, gen + 2, memory_order_release);
}
static inline unsigned lockless_read_begin(atomic_uint* genptr) {
return atomic_load_explicit(genptr, memory_order_acquire);
}
static inline bool lockless_read_end(atomic_uint* genptr, unsigned* want) {
unsigned gen1 = *want;
unsigned gen2 = atomic_load_explicit(genptr, memory_order_acquire);
unsigned is_being_actively_changed = gen1 & 1;
unsigned we_lost_race_with_writers = gen1 ^ gen2;
if (!(is_being_actively_changed | we_lost_race_with_writers))
return true;
*want = gen2;
return false;
}
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_ */

View file

@ -57,7 +57,8 @@ void *__maps_randaddr(void);
void __maps_add(struct Map *); void __maps_add(struct Map *);
void __maps_free(struct Map *); void __maps_free(struct Map *);
void __maps_insert(struct Map *); void __maps_insert(struct Map *);
bool __maps_track(char *, size_t); int __maps_untrack(char *, size_t);
bool __maps_track(char *, size_t, int, int);
struct Map *__maps_alloc(void); struct Map *__maps_alloc(void);
struct Map *__maps_floor(const char *); struct Map *__maps_floor(const char *);
void __maps_stack(char *, int, int, size_t, int, intptr_t); void __maps_stack(char *, int, int, size_t, int, intptr_t);
@ -78,6 +79,13 @@ static inline struct Map *__maps_next(struct Map *map) {
return 0; return 0;
} }
static inline struct Map *__maps_prev(struct Map *map) {
struct Tree *node;
if ((node = tree_prev(&map->tree)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_first(void) { static inline struct Map *__maps_first(void) {
struct Tree *node; struct Tree *node;
if ((node = tree_first(__maps.maps))) if ((node = tree_first(__maps.maps)))

View file

@ -85,7 +85,8 @@ privileged optimizespeed struct Map *__maps_floor(const char *addr) {
return 0; return 0;
} }
static bool __maps_overlaps(const char *addr, size_t size, int pagesz) { static bool __maps_overlaps(const char *addr, size_t size) {
int pagesz = __pagesize;
struct Map *map, *floor = __maps_floor(addr); struct Map *map, *floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) for (map = floor; map && map->addr <= addr + size; map = __maps_next(map))
if (MAX(addr, map->addr) < if (MAX(addr, map->addr) <
@ -305,27 +306,39 @@ void __maps_insert(struct Map *map) {
} }
static void __maps_track_insert(struct Map *map, char *addr, size_t size, static void __maps_track_insert(struct Map *map, char *addr, size_t size,
uintptr_t map_handle) { uintptr_t map_handle, int prot, int flags) {
map->addr = addr; map->addr = addr;
map->size = size; map->size = size;
map->prot = PROT_READ | PROT_WRITE; map->prot = prot;
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK; map->flags = flags;
map->hand = map_handle; map->hand = map_handle;
__maps_lock(); __maps_lock();
ASSERT(!__maps_overlaps(addr, size));
__maps_insert(map); __maps_insert(map);
__maps_unlock(); __maps_unlock();
} }
bool __maps_track(char *addr, size_t size) { // adds interval to rbtree (no sys_mmap)
bool __maps_track(char *addr, size_t size, int prot, int flags) {
struct Map *map; struct Map *map;
do { do {
if (!(map = __maps_alloc())) if (!(map = __maps_alloc()))
return false; return false;
} while (map == MAPS_RETRY); } while (map == MAPS_RETRY);
__maps_track_insert(map, addr, size, -1); __maps_track_insert(map, addr, size, -1, prot, flags);
return true; return true;
} }
// removes interval from rbtree (no sys_munmap)
int __maps_untrack(char *addr, size_t size) {
struct Map *deleted = 0;
__maps_lock();
int rc = __muntrack(addr, size, __pagesize, &deleted);
__maps_unlock();
__maps_free_all(deleted);
return rc;
}
struct Map *__maps_alloc(void) { struct Map *__maps_alloc(void) {
struct Map *map; struct Map *map;
uintptr_t tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed); uintptr_t tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);
@ -342,7 +355,9 @@ struct Map *__maps_alloc(void) {
if (sys.addr == MAP_FAILED) if (sys.addr == MAP_FAILED)
return 0; return 0;
map = sys.addr; map = sys.addr;
__maps_track_insert(map, sys.addr, gransz, sys.maphandle); __maps_track_insert(map, sys.addr, gransz, sys.maphandle,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
for (int i = 1; i < gransz / sizeof(struct Map); ++i) for (int i = 1; i < gransz / sizeof(struct Map); ++i)
__maps_free(map + i); __maps_free(map + i);
return MAPS_RETRY; return MAPS_RETRY;
@ -370,7 +385,7 @@ static int __munmap(char *addr, size_t size) {
size_t pgup_size = (size + pagesz - 1) & -pagesz; size_t pgup_size = (size + pagesz - 1) & -pagesz;
size_t grup_size = (size + gransz - 1) & -gransz; size_t grup_size = (size + gransz - 1) & -gransz;
if (grup_size > pgup_size) if (grup_size > pgup_size)
if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size, pagesz)) { if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size)) {
__maps_unlock(); __maps_unlock();
return einval(); return einval();
} }
@ -420,7 +435,7 @@ static void *__maps_pickaddr(size_t size) {
__maps.pick = 0; __maps.pick = 0;
if (!addr) if (!addr)
addr = __maps_randaddr(); addr = __maps_randaddr();
if (!__maps_overlaps(addr, size, __pagesize)) { if (!__maps_overlaps(addr, size)) {
__maps.pick = addr + ((size + __gransize - 1) & -__gransize); __maps.pick = addr + ((size + __gransize - 1) & -__gransize);
__maps_unlock(); __maps_unlock();
return addr; return addr;
@ -455,7 +470,7 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
sysflags |= MAP_FIXED_NOREPLACE_linux; sysflags |= MAP_FIXED_NOREPLACE_linux;
} else if (IsFreebsd() || IsNetbsd()) { } else if (IsFreebsd() || IsNetbsd()) {
sysflags |= MAP_FIXED; sysflags |= MAP_FIXED;
if (__maps_overlaps(addr, size, pagesz)) { if (__maps_overlaps(addr, size)) {
__maps_free(map); __maps_free(map);
return (void *)eexist(); return (void *)eexist();
} }
@ -508,11 +523,8 @@ TryAgain:
} }
// untrack mapping we blew away // untrack mapping we blew away
if (!IsWindows() && should_untrack) { if (!IsWindows() && should_untrack)
struct Map *deleted = 0; __maps_untrack(res.addr, size);
__muntrack(res.addr, size, pagesz, &deleted);
__maps_free_all(deleted);
}
// track map object // track map object
map->addr = res.addr; map->addr = res.addr;
@ -599,8 +611,8 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
size_t pgup_old_size = (old_size + pagesz - 1) & -pagesz; size_t pgup_old_size = (old_size + pagesz - 1) & -pagesz;
size_t grup_old_size = (old_size + gransz - 1) & -gransz; size_t grup_old_size = (old_size + gransz - 1) & -gransz;
if (grup_old_size > pgup_old_size) if (grup_old_size > pgup_old_size)
if (__maps_overlaps(old_addr + pgup_old_size, grup_old_size - pgup_old_size, if (__maps_overlaps(old_addr + pgup_old_size,
pagesz)) grup_old_size - pgup_old_size))
return (void *)einval(); return (void *)einval();
old_size = pgup_old_size; old_size = pgup_old_size;
@ -611,7 +623,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
size_t grup_new_size = (new_size + gransz - 1) & -gransz; size_t grup_new_size = (new_size + gransz - 1) & -gransz;
if (grup_new_size > pgup_new_size) if (grup_new_size > pgup_new_size)
if (__maps_overlaps(new_addr + pgup_new_size, if (__maps_overlaps(new_addr + pgup_new_size,
grup_new_size - pgup_new_size, pagesz)) grup_new_size - pgup_new_size))
return (void *)einval(); return (void *)einval();
} }

10
libc/intrin/rlimit.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_
#define COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_
#include "libc/calls/struct/rlimit.h"
COSMOPOLITAN_C_START_
void __rlimit_stack_set(struct rlimit);
struct rlimit __rlimit_stack_get(void);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_ */

76
libc/intrin/rlimitstack.c Normal file
View file

@ -0,0 +1,76 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/atomic.h"
#include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/rlimit.internal.h"
#include "libc/cosmo.h"
#include "libc/dce.h"
#include "libc/intrin/cxaatexit.h"
#include "libc/intrin/lockless.h"
#include "libc/intrin/rlimit.h"
#include "libc/runtime/stack.h"
#include "libc/sysv/consts/rlim.h"
#include "libc/sysv/consts/rlimit.h"
struct atomic_rlimit {
atomic_ulong cur;
atomic_ulong max;
atomic_uint once;
atomic_uint gen;
};
static struct atomic_rlimit __rlimit_stack;
static void __rlimit_stack_init(void) {
struct rlimit rlim;
if (IsWindows()) {
rlim.rlim_cur = GetStaticStackSize();
rlim.rlim_max = -1; // RLIM_INFINITY in consts.sh
} else {
sys_getrlimit(RLIMIT_STACK, &rlim);
}
atomic_init(&__rlimit_stack.cur, rlim.rlim_cur);
atomic_init(&__rlimit_stack.max, rlim.rlim_max);
}
struct rlimit __rlimit_stack_get(void) {
unsigned gen;
unsigned long cur, max;
cosmo_once(&__rlimit_stack.once, __rlimit_stack_init);
gen = lockless_read_begin(&__rlimit_stack.gen);
do {
cur = atomic_load_explicit(&__rlimit_stack.cur, memory_order_acquire);
max = atomic_load_explicit(&__rlimit_stack.max, memory_order_acquire);
} while (!lockless_read_end(&__rlimit_stack.gen, &gen));
return (struct rlimit){cur, max};
}
void __rlimit_stack_set(struct rlimit rlim) {
unsigned gen;
unsigned long cur, max;
cosmo_once(&__rlimit_stack.once, __rlimit_stack_init);
__cxa_lock();
cur = rlim.rlim_cur;
max = rlim.rlim_max;
gen = lockless_write_begin(&__rlimit_stack.gen);
atomic_store_explicit(&__rlimit_stack.cur, cur, memory_order_release);
atomic_store_explicit(&__rlimit_stack.max, max, memory_order_release);
lockless_write_end(&__rlimit_stack.gen, gen);
__cxa_unlock();
}

View file

@ -53,6 +53,8 @@
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/runtime/symbols.internal.h" #include "libc/runtime/symbols.internal.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sa.h" #include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sicode.h" #include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h" #include "libc/sysv/consts/ss.h"
@ -680,7 +682,8 @@ textwindows dontinstrument static uint32_t __sig_worker(void *arg) {
__bootstrap_tls(&tls, __builtin_frame_address(0)); __bootstrap_tls(&tls, __builtin_frame_address(0));
char *sp = __builtin_frame_address(0); char *sp = __builtin_frame_address(0);
__maps_track((char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STKSZ, __maps_track((char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STKSZ,
STKSZ); STKSZ, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
for (;;) { for (;;) {
_pthread_mutex_lock(&__sig_worker_lock); _pthread_mutex_lock(&__sig_worker_lock);

View file

@ -23,9 +23,16 @@
#include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall-sysv.internal.h"
#include "libc/cosmo.h" #include "libc/cosmo.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/dlopen/dlfcn.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/dll.h" #include "libc/intrin/dll.h"
#include "libc/intrin/maps.h"
#include "libc/intrin/rlimit.h"
#include "libc/intrin/strace.h"
#include "libc/intrin/weaken.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/sock/internal.h"
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
#include "libc/thread/posixthread.internal.h" #include "libc/thread/posixthread.internal.h"
@ -35,6 +42,11 @@
* @fileoverview cosmo stack memory manager * @fileoverview cosmo stack memory manager
*/ */
#define MAP_GROWSDOWN_LINUX 0x00000100
#define MAP_ANONYMOUS_LINUX 0x00000020
#define MAP_NOREPLACE_LINUX 0x08000000
#define MAP_NORESERVE_LINUX 0x00004000
#define MAP_ANON_OPENBSD 0x1000 #define MAP_ANON_OPENBSD 0x1000
#define MAP_STACK_OPENBSD 0x4000 #define MAP_STACK_OPENBSD 0x4000
@ -43,8 +55,8 @@
struct CosmoStack { struct CosmoStack {
struct Dll elem; struct Dll elem;
void *stackaddr; void *stackaddr;
unsigned stacksize; size_t stacksize;
unsigned guardsize; size_t guardsize;
}; };
struct CosmoStacks { struct CosmoStacks {
@ -79,10 +91,133 @@ void cosmo_stack_wipe(void) {
_pthread_mutex_wipe_np(&cosmo_stacks.lock); _pthread_mutex_wipe_np(&cosmo_stacks.lock);
} }
static errno_t cosmo_stack_munmap(void *addr, size_t size) { // map_growsdown will not grow more than rlimit_stack
static size_t cosmo_stack_maxgrow(void) {
return __rlimit_stack_get().rlim_cur & -__pagesize;
}
// allocates private anonymous fixed noreplace memory on linux
static void *flixmap(void *addr, size_t size, int prot, int flags) {
flags |= MAP_PRIVATE | MAP_ANONYMOUS_LINUX | MAP_NOREPLACE_LINUX;
void *res = __sys_mmap(addr, size, prot, flags, -1, 0, 0);
if (res != MAP_FAILED) {
if (res != addr) {
sys_munmap(addr, size);
errno = EEXIST; // polyfill linux 4.17+ behavior
res = 0;
}
} else {
res = 0;
}
STRACE("mmap(%p, %'zu, %s, %s) → %p% m", addr, size, DescribeProtFlags(prot),
DescribeMapFlags(flags), res);
return res;
}
// maps stack on linux
static void *slackmap(size_t stacksize, size_t guardsize) {
int olde = errno;
struct Map *prev, *map;
char *max = (char *)0x7fffffffffff;
size_t need = guardsize + stacksize;
__maps_lock();
for (;;) {
// look for empty space beneath higher mappings
char *region = 0;
for (map = __maps_floor(max); map; map = prev) {
char *min = (char *)(intptr_t)__pagesize;
if ((prev = __maps_prev(map)))
min = prev->addr + prev->size;
if (map->addr - min >= need) {
region = map->addr - need;
max = region - 1;
break;
}
}
if (!region)
break;
// track intended memory in rbtree
if (!__maps_track(region, guardsize, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS_LINUX))
break;
if (!__maps_track(region + guardsize, stacksize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS_LINUX)) {
__maps_untrack(region, need);
break;
}
__maps_unlock();
// ask kernel to create guard region
// taking special care to not clobber untracked mappings
//
// it's important that this call happen first, since it limits how
// much memory map_growsdown will secretly consume. if there's
// nothing beneath a map_growsdown mapping, then the kernel reserves
// (and this isn't listed /proc/PID/maps so don't bother looking)
// `rlimit_stack.rlim_cur & -__pagesize` bytes of memory including
// this top-most page, and another 1mb of guard pages beneath that.
// but by mapping our guard pages manually, we ensure the guard
// region and the stack itself will be exactly as big as we want.
//
// you'd think we could mmap(0, pagesz, growsdown) to let the kernel
// pick an address and then we could just upscale the user's stack
// size request to whatever rlimit_stack is if it's bigger. but the
// linux kernel will actually choose addresses between existing maps
// where the hole is smaller than rlimit_stack.
//
// to use map_growsdown, we must use map_fixed. normally when we use
// map_fixed, we reserve an entire kernel-assigned region beforehand
// to ensure there isn't any overlap with existing mappings. however
// since growsdown stops growing when it encounters another mapping,
// you can't map it on top of a reservation mapping. so we must take
// a leap of faith there aren't any mystery mappings twixt the guard
// region and growsdown page below.
char *guard_region =
flixmap(region, guardsize, PROT_NONE, MAP_NORESERVE_LINUX);
if (!guard_region) {
RecoverFromMmapFailure:
if (errno != EEXIST) {
// mmap() probably raised enomem due to rlimit_as etc.
__maps_untrack(region, need);
return 0;
} else {
// we've encountered a mystery mapping. it's hard to imagine
// this happening, since we don't use map_growsdown when
// cosmo_dlopen() is linked in the binary. in that case, the
// tracker we created covers at least some of the rogue map,
// therefore this issue should fix itself if we keep going
errno = olde;
__maps_lock();
++max;
continue;
}
}
// ask kernel to create stack pages
// taking special care to not clobber untracked mappings
char *top_page = flixmap(region + need - __pagesize, __pagesize,
PROT_READ | PROT_WRITE, MAP_GROWSDOWN_LINUX);
if (!top_page) {
sys_munmap(region, guardsize);
goto RecoverFromMmapFailure;
}
// return address to bottom of stack
return region + guardsize;
}
__maps_unlock();
errno = ENOMEM;
return 0;
}
static errno_t cosmo_stack_munmap(char *stackaddr, size_t stacksize,
size_t guardsize) {
errno_t r = 0; errno_t r = 0;
errno_t e = errno; errno_t e = errno;
if (!munmap(addr, size)) { if (!munmap(stackaddr - guardsize, //
guardsize + stacksize)) {
r = errno; r = errno;
errno = e; errno = e;
} }
@ -119,7 +254,8 @@ static void cosmo_stack_rehabilitate(struct Dll *stacks) {
struct Dll *e; struct Dll *e;
for (e = dll_first(stacks); e; e = dll_next(stacks, e)) for (e = dll_first(stacks); e; e = dll_next(stacks, e))
cosmo_stack_munmap(THREADSTACK_CONTAINER(e)->stackaddr, cosmo_stack_munmap(THREADSTACK_CONTAINER(e)->stackaddr,
THREADSTACK_CONTAINER(e)->stacksize); THREADSTACK_CONTAINER(e)->stacksize,
THREADSTACK_CONTAINER(e)->guardsize);
cosmo_stack_lock(); cosmo_stack_lock();
dll_make_first(&cosmo_stacks.objects, stacks); dll_make_first(&cosmo_stacks.objects, stacks);
cosmo_stack_unlock(); cosmo_stack_unlock();
@ -193,39 +329,41 @@ void cosmo_stack_setmaxstacks(int maxstacks) {
* abstract all the gory details of gaining authorized memory, and * abstract all the gory details of gaining authorized memory, and
* additionally implements caching for lightning fast performance. * additionally implements caching for lightning fast performance.
* *
* The stack size must be nonzero. It is rounded up to the granularity * The stack size must be nonzero. It specifies the minimum amount of
* of the underlying system allocator, which is normally the page size. * stack space that will be available for use. The provided value is
* Your parameter will be updated with the selected value upon success. * rounded up to the system page size. It may be increased further for
* various reasons. Your stack size parameter will be updated with the
* chosen value upon success.
* *
* The guard size specifies how much memory should be protected at the * The guard size specifies the minimum amount of memory that should be
* bottom of your stack. This is helpful for ensuring stack overflows * protected beneath your stack. This helps ensure stack overflows cause
* will result in a segmentation fault, rather than corrupting memory * a segfault rather than corrupting memory silently. This may be set to
* silently. This may be set to zero, in which case no guard pages will * zero in which case no guard pages will be made. This value is rounded
* be protected. This value is rounded up to the system page size. The * up to the system page size. The corrected value will be returned upon
* corrected value will be returned upon success. Your guard size needs * success. Your guard size needs to be small enough to leave room for
* to be small enough to leave room for at least one memory page in your * at least one memory page in your stack size i.e. `guardsize +
* stack size i.e. `guardsize + pagesize <= stacksize` must be the case. * pagesize <= stacksize` must be the case. Otherwise this function will
* Otherwise this function will return an `EINVAL` error. * return an `EINVAL` error.
* *
* When you're done using your stack, pass it to cosmo_stack_free() so * When you're done using your stack, pass it to cosmo_stack_free() so
* it can be recycled. Stacks are only recycled when the `stacksize` and * it can be recycled. Stacks are only recycled when the `stacksize` and
* `guardsize` parameters are an exact match after correction. Otherwise * `guardsize` parameters match the constraints described above. Stacks
* they'll likely be freed eventually, in a least-recently used fashion, * that don't end up getting reused will be freed eventually, in a least
* based upon the configurable cosmo_stack_setmaxstacks() setting. * recently used way based upon your cosmo_stack_setmaxstacks() setting.
* *
* This function returns 0 on success, or an errno on error. See the * This function returns 0 on success, or an errno on error. See the
* documentation of mmap() for a list possible errors that may occur. * documentation of mmap() for a list possible errors that may occur.
*/ */
errno_t cosmo_stack_alloc(unsigned *inout_stacksize, // errno_t cosmo_stack_alloc(size_t *inout_stacksize, //
unsigned *inout_guardsize, // size_t *inout_guardsize, //
void **out_addr) { void **out_addr) {
// validate arguments // validate arguments
unsigned stacksize = *inout_stacksize; size_t stacksize = *inout_stacksize;
unsigned guardsize = *inout_guardsize; size_t guardsize = *inout_guardsize;
stacksize = (stacksize + __gransize - 1) & -__gransize; stacksize = (stacksize + __pagesize - 1) & -__pagesize;
guardsize = (guardsize + __pagesize - 1) & -__pagesize; guardsize = (guardsize + __pagesize - 1) & -__pagesize;
if (guardsize + __pagesize > stacksize) if (!stacksize)
return EINVAL; return EINVAL;
// recycle stack // recycle stack
@ -236,8 +374,10 @@ errno_t cosmo_stack_alloc(unsigned *inout_stacksize, //
struct CosmoStack *ts = THREADSTACK_CONTAINER(e); struct CosmoStack *ts = THREADSTACK_CONTAINER(e);
if (ts->stacksize == stacksize && // if (ts->stacksize == stacksize && //
ts->guardsize == guardsize) { ts->guardsize == guardsize) {
dll_remove(&cosmo_stacks.stacks, e);
stackaddr = ts->stackaddr; stackaddr = ts->stackaddr;
stacksize = ts->stacksize;
guardsize = ts->guardsize;
dll_remove(&cosmo_stacks.stacks, e);
dll_make_first(&cosmo_stacks.objects, e); dll_make_first(&cosmo_stacks.objects, e);
--cosmo_stacks.count; --cosmo_stacks.count;
break; break;
@ -247,20 +387,37 @@ errno_t cosmo_stack_alloc(unsigned *inout_stacksize, //
// create stack // create stack
if (!stackaddr) { if (!stackaddr) {
errno_t e = errno; errno_t olde = errno;
stackaddr = mmap(0, stacksize, PROT_READ | PROT_WRITE, if (!IsTiny() && IsLinux() && guardsize && !_weaken(cosmo_dlopen) &&
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); stacksize <= cosmo_stack_maxgrow() && !IsQemuUser()) {
if (stackaddr == MAP_FAILED) { // this special linux-only stack allocator significantly reduces
// the consumption of virtual memory.
if (!(stackaddr = slackmap(stacksize, guardsize))) {
errno_t err = errno; errno_t err = errno;
errno = e; errno = olde;
return err; return err;
} }
} else {
char *map = mmap(0, guardsize + stacksize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (map == MAP_FAILED) {
errno_t err = errno;
errno = olde;
return err;
}
stackaddr = map + guardsize;
if (IsOpenbsd()) if (IsOpenbsd())
if (!TellOpenbsdThisIsStackMemory(stackaddr, stacksize)) if (!TellOpenbsdThisIsStackMemory(stackaddr, stacksize))
notpossible; notpossible;
if (guardsize) if (guardsize) {
if (mprotect(stackaddr, guardsize, PROT_NONE | PROT_GUARD)) if (mprotect(map, guardsize, PROT_NONE | PROT_GUARD)) {
notpossible; errno_t err = errno;
munmap(map, guardsize + stacksize);
errno = olde;
return err;
}
}
}
} }
// return stack // return stack
@ -277,20 +434,22 @@ static void cosmo_stack_setup(void) {
/** /**
* Frees stack memory. * Frees stack memory.
* *
* While not strictly required, it's assumed these three values would be * While not strictly required, it's assumed the three parameters are
* those returned by an earlier call to cosmo_stack_alloc(). * those returned by an earlier call to cosmo_stack_alloc(). If they
* aren't page aligned and rounded, this function will return EINVAL.
* *
* This function returns 0 on success, or an errno on error. The `errno` * This function returns 0 on success, or an errno on error. The `errno`
* variable is never clobbered. You can only dependably count on this to * variable is never clobbered. You can only dependably count on this to
* return an error on failure when you say `cosmo_stack_setmaxstacks(0)` * return an error on failure when you say `cosmo_stack_setmaxstacks(0)`
*/ */
errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize, errno_t cosmo_stack_free(void *stackaddr, size_t stacksize, size_t guardsize) {
unsigned guardsize) { if (!stacksize)
stacksize = (stacksize + __gransize - 1) & -__gransize;
guardsize = (guardsize + __pagesize - 1) & -__pagesize;
if (guardsize + __pagesize > stacksize)
return EINVAL; return EINVAL;
if ((uintptr_t)stackaddr & (__gransize - 1)) if (stacksize & (__pagesize - 1))
return EINVAL;
if (guardsize & (__pagesize - 1))
return EINVAL;
if ((uintptr_t)stackaddr & (__pagesize - 1))
return EINVAL; return EINVAL;
cosmo_stack_lock(); cosmo_stack_lock();
struct Dll *surplus = 0; struct Dll *surplus = 0;
@ -318,7 +477,7 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
cosmo_stack_rehabilitate(surplus); cosmo_stack_rehabilitate(surplus);
errno_t err = 0; errno_t err = 0;
if (stackaddr) if (stackaddr)
err = cosmo_stack_munmap(stackaddr, stacksize); err = cosmo_stack_munmap(stackaddr, stacksize, guardsize);
return err; return err;
} }

View file

@ -141,7 +141,8 @@ static textwindows dontinstrument uint32_t __proc_worker(void *arg) {
__bootstrap_tls(&tls, __builtin_frame_address(0)); __bootstrap_tls(&tls, __builtin_frame_address(0));
__maps_track( __maps_track(
(char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE, (char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE,
STACK_SIZE); STACK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
for (;;) { for (;;) {
// assemble a group of processes to wait on. if more than 64 // assemble a group of processes to wait on. if more than 64

View file

@ -30,6 +30,8 @@
#include "libc/nt/thread.h" #include "libc/nt/thread.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/sysv/consts/clock.h" #include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sicode.h" #include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h" #include "libc/sysv/errfuns.h"
@ -47,7 +49,8 @@ static textwindows dontinstrument uint32_t __itimer_worker(void *arg) {
__bootstrap_tls(&tls, sp); __bootstrap_tls(&tls, sp);
__maps_track( __maps_track(
(char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE, (char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE,
STACK_SIZE); STACK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
for (;;) { for (;;) {
bool dosignal = false; bool dosignal = false;
struct timeval now, waituntil; struct timeval now, waituntil;

View file

@ -35,8 +35,8 @@
*/ */
void *NewCosmoStack(void) { void *NewCosmoStack(void) {
void *stackaddr; void *stackaddr;
unsigned stacksize = GetStackSize(); size_t stacksize = GetStackSize();
unsigned guardsize = GetGuardSize(); size_t guardsize = GetGuardSize();
errno_t err = cosmo_stack_alloc(&stacksize, &guardsize, &stackaddr); errno_t err = cosmo_stack_alloc(&stacksize, &guardsize, &stackaddr);
if (!err) if (!err)
return stackaddr; return stackaddr;

View file

@ -19,7 +19,7 @@
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
/** /**
* Returns size of protected region at bottom of thread stack. * Returns size of protected region beneath thread stack.
* *
* @param guardsize will be set to guard size in bytes * @param guardsize will be set to guard size in bytes
* @return 0 on success, or errno on error * @return 0 on success, or errno on error

View file

@ -20,15 +20,13 @@
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
/** /**
* Returns configuration for thread stack. * Returns configuration for custom thread stack.
* *
* This is a getter for a configuration attribute. By default, zeros are * If zero is returned to `*stackaddr` then a custom stack hasn't been
* returned. If pthread_attr_setstack() was called earlier, then this'll * specified by a previous call to pthread_attr_setstack().
* return those earlier supplied values.
* *
* @param stackaddr will be set to stack address in bytes * @param stackaddr will be set to stack address in bytes
* @return 0 on success, or errno on error * @return 0 on success, or errno on error
* @see pthread_attr_setstacksize()
*/ */
errno_t pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, errno_t pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr,
size_t *stacksize) { size_t *stacksize) {

View file

@ -40,7 +40,7 @@
errno_t pthread_attr_init(pthread_attr_t *attr) { errno_t pthread_attr_init(pthread_attr_t *attr) {
*attr = (pthread_attr_t){ *attr = (pthread_attr_t){
.__stacksize = GetStackSize(), .__stacksize = GetStackSize(),
.__guardsize = __pagesize, .__guardsize = GetGuardSize(),
}; };
return 0; return 0;
} }

View file

@ -19,13 +19,7 @@
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
/** /**
* Sets size of protected region at bottom of thread stack. * Sets minimum size of protected region beneath thread stack.
*
* Cosmopolitan sets this value to `sysconf(_SC_PAGESIZE)` by default.
*
* You may set `guardsize` to disable the stack guard feature and gain a
* slight performance advantage by avoiding mprotect() calls. Note that
* it could make your code more prone to silent unreported corruption.
* *
* @param guardsize contains guard size in bytes, which is implicitly * @param guardsize contains guard size in bytes, which is implicitly
* rounded up to `sysconf(_SC_PAGESIZE)`, or zero to disable * rounded up to `sysconf(_SC_PAGESIZE)`, or zero to disable

View file

@ -16,64 +16,42 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/limits.h" #include "libc/runtime/stack.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
/** /**
* Configures custom allocated stack for thread, e.g. * Configures custom stack for thread.
* *
* pthread_t id; * Normally you want to use pthread_attr_setstacksize() and
* pthread_attr_t attr; * pthread_attr_setguardsize() to configure how pthread_create()
* char *stk = NewCosmoStack(); * allocates stack memory for newly created threads. Cosmopolitan is
* pthread_attr_init(&attr); * very good at managing stack memory. However if you still want to
* pthread_attr_setstack(&attr, stk, GetStackSize()); * allocate stack memory on your own, POSIX defines this function.
* pthread_create(&id, &attr, func, 0);
* pthread_attr_destroy(&attr);
* pthread_join(id, 0);
* FreeCosmoStack(stk);
* *
* Your stack must have at least `PTHREAD_STACK_MIN` bytes, which * Your `stackaddr` points to the byte at the very bottom of your stack.
* Cosmpolitan Libc defines as `GetStackSize()`. It's a link-time * You are responsible for this memory. Your POSIX threads runtime will
* constant used by Actually Portable Executable that's 128 kb by * not free or unmap this allocation when the thread has terminated. If
* default. See libc/runtime/stack.h for docs on your stack limit * `stackaddr` is null then `stacksize` is ignored and default behavior
* since the APE ELF phdrs are the one true source of truth here. * is restored, i.e. pthread_create() will manage stack allocations.
* *
* Cosmpolitan Libc runtime magic (e.g. ftrace) and memory safety * Your `stackaddr` could be created by malloc(). On OpenBSD,
* (e.g. kprintf) assumes that stack sizes are two-powers and are * pthread_create() will augment your custom allocation so it's
* aligned to that two-power. Conformance isn't required since we * permissable by the kernel to use as a stack. You may also call
* say caveat emptor to those who don't maintain these invariants * Cosmopolitan APIs such NewCosmoStack() and cosmo_stack_alloc().
* please consider using NewCosmoStack(), which is always perfect * Static memory can be used, but it won't reduce pthread footprint.
* or use `mmap(0, GetStackSize() << 1, ...)` for a bigger stack.
* *
* Unlike pthread_attr_setstacksize(), this function permits just
* about any parameters and will change the values and allocation
* as needed to conform to the mandatory requirements of the host
* operating system even if it doesn't meet the stricter needs of
* Cosmopolitan Libc userspace libraries. For example with malloc
* allocations, things like page size alignment, shall be handled
* automatically for compatibility with existing codebases.
*
* The same stack shouldn't be used for two separate threads. Use
* fresh stacks for each thread so that ASAN can be much happier.
*
* @param stackaddr is address of stack allocated by caller, and
* may be NULL in which case default behavior is restored
* @param stacksize is size of caller allocated stack
* @return 0 on success, or errno on error * @return 0 on success, or errno on error
* @raise EINVAL if parameters were unacceptable * @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN`
* @see pthread_attr_setstacksize() * @see pthread_attr_setstacksize()
*/ */
errno_t pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, errno_t pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr,
size_t stacksize) { size_t stacksize) {
if (!stackaddr) { if (!stackaddr) {
attr->__stackaddr = 0; attr->__stackaddr = 0;
attr->__stacksize = 0; attr->__stacksize = GetStackSize();
return 0; return 0;
} }
if (stacksize > INT_MAX)
return EINVAL;
if (stacksize < PTHREAD_STACK_MIN) if (stacksize < PTHREAD_STACK_MIN)
return EINVAL; return EINVAL;
attr->__stackaddr = stackaddr; attr->__stackaddr = stackaddr;

View file

@ -17,19 +17,28 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/limits.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
/** /**
* Defines minimum stack size for thread. * Specifies minimum stack size for thread.
*
* On Linux, if you're not using `cosmocc -mtiny`, and you're not using
* cosmo_dlopen(), and guard size is nonzero, then `MAP_GROWSDOWN` will
* be used to create your stack memory. This helps minimize virtual
* memory consumption. Please note this is only possible if `stacksize`
* is no larger than the current `RLIMIT_STACK`, otherwise the runtime
* will map your stack using plain old mmap().
*
* Non-custom stacks may be recycled by the cosmo runtime. You can
* control this behavior by calling cosmo_stack_setmaxstacks(). It's
* useful for both tuning performance and hardening security. See also
* pthread_attr_setguardsize() which is important for security too.
* *
* @param stacksize contains stack size in bytes * @param stacksize contains stack size in bytes
* @return 0 on success, or errno on error * @return 0 on success, or errno on error
* @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN` * @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN`
*/ */
errno_t pthread_attr_setstacksize(pthread_attr_t *a, size_t stacksize) { errno_t pthread_attr_setstacksize(pthread_attr_t *a, size_t stacksize) {
if (stacksize > INT_MAX)
return EINVAL;
if (stacksize < PTHREAD_STACK_MIN) if (stacksize < PTHREAD_STACK_MIN)
return EINVAL; return EINVAL;
a->__stacksize = stacksize; a->__stacksize = stacksize;

View file

@ -2,7 +2,7 @@
#define COSMOPOLITAN_LIBC_THREAD_THREAD_H_ #define COSMOPOLITAN_LIBC_THREAD_THREAD_H_
#define PTHREAD_KEYS_MAX 46 #define PTHREAD_KEYS_MAX 46
#define PTHREAD_STACK_MIN 65536 #define PTHREAD_STACK_MIN 32768
#define PTHREAD_USE_NSYNC 1 #define PTHREAD_USE_NSYNC 1
#define PTHREAD_DESTRUCTOR_ITERATIONS 4 #define PTHREAD_DESTRUCTOR_ITERATIONS 4
@ -129,8 +129,8 @@ typedef struct pthread_attr_s {
int __contentionscope; int __contentionscope;
int __sigaltstacksize; int __sigaltstacksize;
uint64_t __sigmask; uint64_t __sigmask;
unsigned __guardsize; size_t __guardsize;
unsigned __stacksize; size_t __stacksize;
void *__stackaddr; void *__stackaddr;
void *__sigaltstackaddr; void *__sigaltstackaddr;
} pthread_attr_t; } pthread_attr_t;

View file

@ -0,0 +1,75 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/cosmo.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/testlib/testlib.h"
// returns true if byte at memory address is readable
bool readable(void *addr) {
return testlib_pokememory(addr);
}
// returns true if page is reserved by linux memory manager
// it can be true for addresses that aren't listed in /proc/PID/maps
bool occupied(void *addr) {
int olde = errno;
char *want = (char *)((uintptr_t)addr & -__pagesize);
char *got =
__sys_mmap(want, __pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0, 0);
if (got == MAP_FAILED) {
unassert(errno == IsFreebsd() ? EINVAL : EEXIST);
errno = olde;
return true;
}
sys_munmap(got, __pagesize);
return got != want;
}
TEST(stack, test) {
if (IsWindows())
return;
void *vstackaddr;
size_t stacksize = 65536;
size_t guardsize = 4096;
unassert(!cosmo_stack_alloc(&stacksize, &guardsize, &vstackaddr));
char *stackaddr = vstackaddr;
/* check memory reservation */
unassert(occupied(stackaddr + stacksize - 1)); // top stack
unassert(occupied(stackaddr)); // bot stack
unassert(occupied(stackaddr - 1)); // top guard
unassert(occupied(stackaddr - guardsize)); // bot guard
/* check memory accessibility */
unassert(readable(stackaddr + stacksize - 1)); // top stack
unassert(readable(stackaddr)); // bot stack
unassert(!readable(stackaddr - 1)); // top guard
unassert(!readable(stackaddr - guardsize)); // bot guard
unassert(!cosmo_stack_free(stackaddr, stacksize, guardsize));
}

View file

@ -19,6 +19,7 @@
#include "libc/atomic.h" #include "libc/atomic.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/calls/struct/sigaction.h" #include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigaltstack.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/kprintf.h" #include "libc/intrin/kprintf.h"
@ -27,6 +28,7 @@
#include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/nexgen32e.h"
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sig.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"

View file

@ -70,6 +70,7 @@ void OnUsr1(int sig, siginfo_t *si, void *vctx) {
void SetUpOnce(void) { void SetUpOnce(void) {
cosmo_stack_setmaxstacks((_rand64() & 7) - 1); cosmo_stack_setmaxstacks((_rand64() & 7) - 1);
cosmo_stack_setmaxstacks(100);
} }
void SetUp(void) { void SetUp(void) {

View file

@ -129,6 +129,10 @@ int compare(const void *a, const void *b) {
int main() { int main() {
// this test probably exposes a bug in openbsd
if (IsOpenbsd())
return 0;
// TODO(jart): Why is this test flaky on Windows? // TODO(jart): Why is this test flaky on Windows?
if (IsWindows()) if (IsWindows())
return 0; return 0;