mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-07 03:38:31 +00:00
Reduce stack virtual memory consumption on Linux
This commit is contained in:
parent
cc8a9eb93c
commit
36e5861b0c
31 changed files with 583 additions and 166 deletions
|
@ -16,25 +16,29 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/describeflags.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/nt/enum/consolemodeflags.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
|
||||
#define MAP_GROWSDOWN_LINUX 0x00000100
|
||||
|
||||
const char *_DescribeMapFlags(char buf[64], int x) {
|
||||
const struct DescribeFlags kMapFlags[] = {
|
||||
{MAP_PRIVATE, "PRIVATE"}, //
|
||||
{MAP_ANONYMOUS, "ANONYMOUS"}, //
|
||||
{MAP_SHARED, "SHARED"}, //
|
||||
{MAP_FIXED, "FIXED"}, //
|
||||
{MAP_FIXED_NOREPLACE, "FIXED_NOREPLACE"}, //
|
||||
{MAP_HUGETLB, "HUGETLB"}, //
|
||||
{MAP_CONCEAL, "CONCEAL"}, //
|
||||
{MAP_LOCKED, "LOCKED"}, //
|
||||
{MAP_NORESERVE, "NORESERVE"}, //
|
||||
{MAP_NONBLOCK, "NONBLOCK"}, //
|
||||
{MAP_POPULATE, "POPULATE"}, //
|
||||
{MAP_PRIVATE, "PRIVATE"}, //
|
||||
{MAP_ANONYMOUS, "ANONYMOUS"}, //
|
||||
{MAP_SHARED, "SHARED"}, //
|
||||
{MAP_FIXED, "FIXED"}, //
|
||||
{MAP_FIXED_NOREPLACE, "FIXED_NOREPLACE"}, //
|
||||
{MAP_HUGETLB, "HUGETLB"}, //
|
||||
{MAP_CONCEAL, "CONCEAL"}, //
|
||||
{MAP_LOCKED, "LOCKED"}, //
|
||||
{MAP_NORESERVE, "NORESERVE"}, //
|
||||
{MAP_NONBLOCK, "NONBLOCK"}, //
|
||||
{MAP_POPULATE, "POPULATE"}, //
|
||||
{IsLinux() ? MAP_GROWSDOWN_LINUX : 0, "GROWSDOWN"}, //
|
||||
};
|
||||
return _DescribeFlags(buf, 64, kMapFlags, ARRAYLEN(kMapFlags), "MAP_", x);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "libc/sysv/consts/prot.h"
|
||||
|
||||
const char *_DescribeProtFlags(char buf[48], int x) {
|
||||
if (!x)
|
||||
return "PROT_NONE";
|
||||
const struct DescribeFlags kProtFlags[] = {
|
||||
{PROT_READ, "READ"}, //
|
||||
{PROT_WRITE, "WRITE"}, //
|
||||
|
|
|
@ -17,16 +17,13 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/struct/rlimit.h"
|
||||
#include "libc/calls/struct/rlimit.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/getauxval.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/maps.h"
|
||||
#include "libc/intrin/rlimit.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/stdio/sysparam.h"
|
||||
#include "libc/sysv/consts/auxv.h"
|
||||
#include "libc/sysv/consts/rlim.h"
|
||||
#include "libc/sysv/consts/rlimit.h"
|
||||
|
||||
// Hack for guessing boundaries of _start()'s stack
|
||||
//
|
||||
|
@ -91,12 +88,9 @@ static uintptr_t __get_main_top(int pagesz) {
|
|||
}
|
||||
|
||||
static size_t __get_stack_size(int pagesz, uintptr_t start, uintptr_t top) {
|
||||
size_t size, max = 8 * 1024 * 1024;
|
||||
struct rlimit rlim = {RLIM_INFINITY};
|
||||
sys_getrlimit(RLIMIT_STACK, &rlim);
|
||||
if ((size = rlim.rlim_cur) > max)
|
||||
size = max;
|
||||
return MAX(ROUNDUP(size, pagesz), ROUNDUP(top - start, pagesz));
|
||||
size_t stacksz = __rlimit_stack_get().rlim_cur;
|
||||
stacksz = MIN(stacksz, 1024ul * 1024 * 1024 * 1024);
|
||||
return MAX(ROUNDDOWN(stacksz, pagesz), ROUNDUP(top - start, pagesz));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
38
libc/intrin/isqemu.c
Normal file
38
libc/intrin/isqemu.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2024 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "ape/sections.internal.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/errno.h"
|
||||
|
||||
/**
|
||||
* Returns true if process is running under qemu-x86_64 or qemu-aarch64.
|
||||
*/
|
||||
int IsQemuUser(void) {
|
||||
static char rplus1;
|
||||
if (!rplus1) {
|
||||
// qemu doesn't validate the advice argument
|
||||
// we could also check if __getcwd(0, 0) raises efault
|
||||
int e = errno;
|
||||
int r = !sys_madvise(__executable_start, 16384, 127);
|
||||
errno = e;
|
||||
rplus1 = r + 1;
|
||||
}
|
||||
return rplus1 - 1;
|
||||
}
|
50
libc/intrin/lockless.h
Normal file
50
libc/intrin/lockless.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_
|
||||
#include "libc/atomic.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
// lockless memory transactions
|
||||
//
|
||||
// - one writer
|
||||
// - many readers
|
||||
// - generation is monotonic
|
||||
// - even numbers mean memory is ready
|
||||
// - odd numbers mean memory is actively being changed
|
||||
// - always use acquire semantics inside your read transaction
|
||||
//
|
||||
// let's say you want to be able to atomically read and write to 128-bit
|
||||
// values, but you've only got a 64-bit system. if you expect that it'll
|
||||
// frequently written, then you should use a mutex. but if you expect it
|
||||
// to be frequently read and rarely written, then it's possible to do it
|
||||
// without a mutex; in fact you don't even need the x86 lock instruction
|
||||
// prefix; all that is required is a series of carefully ordered mov ops
|
||||
// which are designed to exploit the strong ordering of the architecture
|
||||
|
||||
static inline unsigned lockless_write_begin(atomic_uint* genptr) {
|
||||
unsigned gen = atomic_load_explicit(genptr, memory_order_acquire);
|
||||
atomic_store_explicit(genptr, gen + 1, memory_order_release);
|
||||
return gen;
|
||||
}
|
||||
|
||||
static inline void lockless_write_end(atomic_uint* genptr, unsigned gen) {
|
||||
atomic_store_explicit(genptr, gen + 2, memory_order_release);
|
||||
}
|
||||
|
||||
static inline unsigned lockless_read_begin(atomic_uint* genptr) {
|
||||
return atomic_load_explicit(genptr, memory_order_acquire);
|
||||
}
|
||||
|
||||
static inline bool lockless_read_end(atomic_uint* genptr, unsigned* want) {
|
||||
unsigned gen1 = *want;
|
||||
unsigned gen2 = atomic_load_explicit(genptr, memory_order_acquire);
|
||||
unsigned is_being_actively_changed = gen1 & 1;
|
||||
unsigned we_lost_race_with_writers = gen1 ^ gen2;
|
||||
if (!(is_being_actively_changed | we_lost_race_with_writers))
|
||||
return true;
|
||||
*want = gen2;
|
||||
return false;
|
||||
}
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_LOCKLESS_H_ */
|
|
@ -57,7 +57,8 @@ void *__maps_randaddr(void);
|
|||
void __maps_add(struct Map *);
|
||||
void __maps_free(struct Map *);
|
||||
void __maps_insert(struct Map *);
|
||||
bool __maps_track(char *, size_t);
|
||||
int __maps_untrack(char *, size_t);
|
||||
bool __maps_track(char *, size_t, int, int);
|
||||
struct Map *__maps_alloc(void);
|
||||
struct Map *__maps_floor(const char *);
|
||||
void __maps_stack(char *, int, int, size_t, int, intptr_t);
|
||||
|
@ -78,6 +79,13 @@ static inline struct Map *__maps_next(struct Map *map) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline struct Map *__maps_prev(struct Map *map) {
|
||||
struct Tree *node;
|
||||
if ((node = tree_prev(&map->tree)))
|
||||
return MAP_TREE_CONTAINER(node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct Map *__maps_first(void) {
|
||||
struct Tree *node;
|
||||
if ((node = tree_first(__maps.maps)))
|
||||
|
|
|
@ -85,7 +85,8 @@ privileged optimizespeed struct Map *__maps_floor(const char *addr) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool __maps_overlaps(const char *addr, size_t size, int pagesz) {
|
||||
static bool __maps_overlaps(const char *addr, size_t size) {
|
||||
int pagesz = __pagesize;
|
||||
struct Map *map, *floor = __maps_floor(addr);
|
||||
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map))
|
||||
if (MAX(addr, map->addr) <
|
||||
|
@ -305,27 +306,39 @@ void __maps_insert(struct Map *map) {
|
|||
}
|
||||
|
||||
static void __maps_track_insert(struct Map *map, char *addr, size_t size,
|
||||
uintptr_t map_handle) {
|
||||
uintptr_t map_handle, int prot, int flags) {
|
||||
map->addr = addr;
|
||||
map->size = size;
|
||||
map->prot = PROT_READ | PROT_WRITE;
|
||||
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK;
|
||||
map->prot = prot;
|
||||
map->flags = flags;
|
||||
map->hand = map_handle;
|
||||
__maps_lock();
|
||||
ASSERT(!__maps_overlaps(addr, size));
|
||||
__maps_insert(map);
|
||||
__maps_unlock();
|
||||
}
|
||||
|
||||
bool __maps_track(char *addr, size_t size) {
|
||||
// adds interval to rbtree (no sys_mmap)
|
||||
bool __maps_track(char *addr, size_t size, int prot, int flags) {
|
||||
struct Map *map;
|
||||
do {
|
||||
if (!(map = __maps_alloc()))
|
||||
return false;
|
||||
} while (map == MAPS_RETRY);
|
||||
__maps_track_insert(map, addr, size, -1);
|
||||
__maps_track_insert(map, addr, size, -1, prot, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
// removes interval from rbtree (no sys_munmap)
|
||||
int __maps_untrack(char *addr, size_t size) {
|
||||
struct Map *deleted = 0;
|
||||
__maps_lock();
|
||||
int rc = __muntrack(addr, size, __pagesize, &deleted);
|
||||
__maps_unlock();
|
||||
__maps_free_all(deleted);
|
||||
return rc;
|
||||
}
|
||||
|
||||
struct Map *__maps_alloc(void) {
|
||||
struct Map *map;
|
||||
uintptr_t tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);
|
||||
|
@ -342,7 +355,9 @@ struct Map *__maps_alloc(void) {
|
|||
if (sys.addr == MAP_FAILED)
|
||||
return 0;
|
||||
map = sys.addr;
|
||||
__maps_track_insert(map, sys.addr, gransz, sys.maphandle);
|
||||
__maps_track_insert(map, sys.addr, gransz, sys.maphandle,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
|
||||
for (int i = 1; i < gransz / sizeof(struct Map); ++i)
|
||||
__maps_free(map + i);
|
||||
return MAPS_RETRY;
|
||||
|
@ -370,7 +385,7 @@ static int __munmap(char *addr, size_t size) {
|
|||
size_t pgup_size = (size + pagesz - 1) & -pagesz;
|
||||
size_t grup_size = (size + gransz - 1) & -gransz;
|
||||
if (grup_size > pgup_size)
|
||||
if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size, pagesz)) {
|
||||
if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size)) {
|
||||
__maps_unlock();
|
||||
return einval();
|
||||
}
|
||||
|
@ -420,7 +435,7 @@ static void *__maps_pickaddr(size_t size) {
|
|||
__maps.pick = 0;
|
||||
if (!addr)
|
||||
addr = __maps_randaddr();
|
||||
if (!__maps_overlaps(addr, size, __pagesize)) {
|
||||
if (!__maps_overlaps(addr, size)) {
|
||||
__maps.pick = addr + ((size + __gransize - 1) & -__gransize);
|
||||
__maps_unlock();
|
||||
return addr;
|
||||
|
@ -455,7 +470,7 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
|
|||
sysflags |= MAP_FIXED_NOREPLACE_linux;
|
||||
} else if (IsFreebsd() || IsNetbsd()) {
|
||||
sysflags |= MAP_FIXED;
|
||||
if (__maps_overlaps(addr, size, pagesz)) {
|
||||
if (__maps_overlaps(addr, size)) {
|
||||
__maps_free(map);
|
||||
return (void *)eexist();
|
||||
}
|
||||
|
@ -508,11 +523,8 @@ TryAgain:
|
|||
}
|
||||
|
||||
// untrack mapping we blew away
|
||||
if (!IsWindows() && should_untrack) {
|
||||
struct Map *deleted = 0;
|
||||
__muntrack(res.addr, size, pagesz, &deleted);
|
||||
__maps_free_all(deleted);
|
||||
}
|
||||
if (!IsWindows() && should_untrack)
|
||||
__maps_untrack(res.addr, size);
|
||||
|
||||
// track map object
|
||||
map->addr = res.addr;
|
||||
|
@ -599,8 +611,8 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
|
|||
size_t pgup_old_size = (old_size + pagesz - 1) & -pagesz;
|
||||
size_t grup_old_size = (old_size + gransz - 1) & -gransz;
|
||||
if (grup_old_size > pgup_old_size)
|
||||
if (__maps_overlaps(old_addr + pgup_old_size, grup_old_size - pgup_old_size,
|
||||
pagesz))
|
||||
if (__maps_overlaps(old_addr + pgup_old_size,
|
||||
grup_old_size - pgup_old_size))
|
||||
return (void *)einval();
|
||||
old_size = pgup_old_size;
|
||||
|
||||
|
@ -611,7 +623,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
|
|||
size_t grup_new_size = (new_size + gransz - 1) & -gransz;
|
||||
if (grup_new_size > pgup_new_size)
|
||||
if (__maps_overlaps(new_addr + pgup_new_size,
|
||||
grup_new_size - pgup_new_size, pagesz))
|
||||
grup_new_size - pgup_new_size))
|
||||
return (void *)einval();
|
||||
}
|
||||
|
||||
|
|
10
libc/intrin/rlimit.h
Normal file
10
libc/intrin/rlimit.h
Normal file
|
@ -0,0 +1,10 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_
|
||||
#include "libc/calls/struct/rlimit.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void __rlimit_stack_set(struct rlimit);
|
||||
struct rlimit __rlimit_stack_get(void);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_RLIMIT_H_ */
|
76
libc/intrin/rlimitstack.c
Normal file
76
libc/intrin/rlimitstack.c
Normal file
|
@ -0,0 +1,76 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2024 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/atomic.h"
|
||||
#include "libc/calls/struct/rlimit.h"
|
||||
#include "libc/calls/struct/rlimit.internal.h"
|
||||
#include "libc/cosmo.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/cxaatexit.h"
|
||||
#include "libc/intrin/lockless.h"
|
||||
#include "libc/intrin/rlimit.h"
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "libc/sysv/consts/rlim.h"
|
||||
#include "libc/sysv/consts/rlimit.h"
|
||||
|
||||
struct atomic_rlimit {
|
||||
atomic_ulong cur;
|
||||
atomic_ulong max;
|
||||
atomic_uint once;
|
||||
atomic_uint gen;
|
||||
};
|
||||
|
||||
static struct atomic_rlimit __rlimit_stack;
|
||||
|
||||
static void __rlimit_stack_init(void) {
|
||||
struct rlimit rlim;
|
||||
if (IsWindows()) {
|
||||
rlim.rlim_cur = GetStaticStackSize();
|
||||
rlim.rlim_max = -1; // RLIM_INFINITY in consts.sh
|
||||
} else {
|
||||
sys_getrlimit(RLIMIT_STACK, &rlim);
|
||||
}
|
||||
atomic_init(&__rlimit_stack.cur, rlim.rlim_cur);
|
||||
atomic_init(&__rlimit_stack.max, rlim.rlim_max);
|
||||
}
|
||||
|
||||
struct rlimit __rlimit_stack_get(void) {
|
||||
unsigned gen;
|
||||
unsigned long cur, max;
|
||||
cosmo_once(&__rlimit_stack.once, __rlimit_stack_init);
|
||||
gen = lockless_read_begin(&__rlimit_stack.gen);
|
||||
do {
|
||||
cur = atomic_load_explicit(&__rlimit_stack.cur, memory_order_acquire);
|
||||
max = atomic_load_explicit(&__rlimit_stack.max, memory_order_acquire);
|
||||
} while (!lockless_read_end(&__rlimit_stack.gen, &gen));
|
||||
return (struct rlimit){cur, max};
|
||||
}
|
||||
|
||||
void __rlimit_stack_set(struct rlimit rlim) {
|
||||
unsigned gen;
|
||||
unsigned long cur, max;
|
||||
cosmo_once(&__rlimit_stack.once, __rlimit_stack_init);
|
||||
__cxa_lock();
|
||||
cur = rlim.rlim_cur;
|
||||
max = rlim.rlim_max;
|
||||
gen = lockless_write_begin(&__rlimit_stack.gen);
|
||||
atomic_store_explicit(&__rlimit_stack.cur, cur, memory_order_release);
|
||||
atomic_store_explicit(&__rlimit_stack.max, max, memory_order_release);
|
||||
lockless_write_end(&__rlimit_stack.gen, gen);
|
||||
__cxa_unlock();
|
||||
}
|
|
@ -53,6 +53,8 @@
|
|||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/symbols.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/sysv/consts/sa.h"
|
||||
#include "libc/sysv/consts/sicode.h"
|
||||
#include "libc/sysv/consts/ss.h"
|
||||
|
@ -680,7 +682,8 @@ textwindows dontinstrument static uint32_t __sig_worker(void *arg) {
|
|||
__bootstrap_tls(&tls, __builtin_frame_address(0));
|
||||
char *sp = __builtin_frame_address(0);
|
||||
__maps_track((char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STKSZ,
|
||||
STKSZ);
|
||||
STKSZ, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
|
||||
for (;;) {
|
||||
_pthread_mutex_lock(&__sig_worker_lock);
|
||||
|
||||
|
|
|
@ -23,9 +23,16 @@
|
|||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/cosmo.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/dlopen/dlfcn.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/describeflags.h"
|
||||
#include "libc/intrin/dll.h"
|
||||
#include "libc/intrin/maps.h"
|
||||
#include "libc/intrin/rlimit.h"
|
||||
#include "libc/intrin/strace.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/sock/internal.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/thread/posixthread.internal.h"
|
||||
|
@ -35,6 +42,11 @@
|
|||
* @fileoverview cosmo stack memory manager
|
||||
*/
|
||||
|
||||
#define MAP_GROWSDOWN_LINUX 0x00000100
|
||||
#define MAP_ANONYMOUS_LINUX 0x00000020
|
||||
#define MAP_NOREPLACE_LINUX 0x08000000
|
||||
#define MAP_NORESERVE_LINUX 0x00004000
|
||||
|
||||
#define MAP_ANON_OPENBSD 0x1000
|
||||
#define MAP_STACK_OPENBSD 0x4000
|
||||
|
||||
|
@ -43,8 +55,8 @@
|
|||
struct CosmoStack {
|
||||
struct Dll elem;
|
||||
void *stackaddr;
|
||||
unsigned stacksize;
|
||||
unsigned guardsize;
|
||||
size_t stacksize;
|
||||
size_t guardsize;
|
||||
};
|
||||
|
||||
struct CosmoStacks {
|
||||
|
@ -79,10 +91,133 @@ void cosmo_stack_wipe(void) {
|
|||
_pthread_mutex_wipe_np(&cosmo_stacks.lock);
|
||||
}
|
||||
|
||||
static errno_t cosmo_stack_munmap(void *addr, size_t size) {
|
||||
// map_growsdown will not grow more than rlimit_stack
|
||||
static size_t cosmo_stack_maxgrow(void) {
|
||||
return __rlimit_stack_get().rlim_cur & -__pagesize;
|
||||
}
|
||||
|
||||
// allocates private anonymous fixed noreplace memory on linux
|
||||
static void *flixmap(void *addr, size_t size, int prot, int flags) {
|
||||
flags |= MAP_PRIVATE | MAP_ANONYMOUS_LINUX | MAP_NOREPLACE_LINUX;
|
||||
void *res = __sys_mmap(addr, size, prot, flags, -1, 0, 0);
|
||||
if (res != MAP_FAILED) {
|
||||
if (res != addr) {
|
||||
sys_munmap(addr, size);
|
||||
errno = EEXIST; // polyfill linux 4.17+ behavior
|
||||
res = 0;
|
||||
}
|
||||
} else {
|
||||
res = 0;
|
||||
}
|
||||
STRACE("mmap(%p, %'zu, %s, %s) → %p% m", addr, size, DescribeProtFlags(prot),
|
||||
DescribeMapFlags(flags), res);
|
||||
return res;
|
||||
}
|
||||
|
||||
// maps stack on linux
|
||||
static void *slackmap(size_t stacksize, size_t guardsize) {
|
||||
int olde = errno;
|
||||
struct Map *prev, *map;
|
||||
char *max = (char *)0x7fffffffffff;
|
||||
size_t need = guardsize + stacksize;
|
||||
__maps_lock();
|
||||
for (;;) {
|
||||
|
||||
// look for empty space beneath higher mappings
|
||||
char *region = 0;
|
||||
for (map = __maps_floor(max); map; map = prev) {
|
||||
char *min = (char *)(intptr_t)__pagesize;
|
||||
if ((prev = __maps_prev(map)))
|
||||
min = prev->addr + prev->size;
|
||||
if (map->addr - min >= need) {
|
||||
region = map->addr - need;
|
||||
max = region - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!region)
|
||||
break;
|
||||
|
||||
// track intended memory in rbtree
|
||||
if (!__maps_track(region, guardsize, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS_LINUX))
|
||||
break;
|
||||
if (!__maps_track(region + guardsize, stacksize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS_LINUX)) {
|
||||
__maps_untrack(region, need);
|
||||
break;
|
||||
}
|
||||
__maps_unlock();
|
||||
|
||||
// ask kernel to create guard region
|
||||
// taking special care to not clobber untracked mappings
|
||||
//
|
||||
// it's important that this call happen first, since it limits how
|
||||
// much memory map_growsdown will secretly consume. if there's
|
||||
// nothing beneath a map_growsdown mapping, then the kernel reserves
|
||||
// (and this isn't listed /proc/PID/maps so don't bother looking)
|
||||
// `rlimit_stack.rlim_cur & -__pagesize` bytes of memory including
|
||||
// this top-most page, and another 1mb of guard pages beneath that.
|
||||
// but by mapping our guard pages manually, we ensure the guard
|
||||
// region and the stack itself will be exactly as big as we want.
|
||||
//
|
||||
// you'd think we could mmap(0, pagesz, growsdown) to let the kernel
|
||||
// pick an address and then we could just upscale the user's stack
|
||||
// size request to whatever rlimit_stack is if it's bigger. but the
|
||||
// linux kernel will actually choose addresses between existing maps
|
||||
// where the hole is smaller than rlimit_stack.
|
||||
//
|
||||
// to use map_growsdown, we must use map_fixed. normally when we use
|
||||
// map_fixed, we reserve an entire kernel-assigned region beforehand
|
||||
// to ensure there isn't any overlap with existing mappings. however
|
||||
// since growsdown stops growing when it encounters another mapping,
|
||||
// you can't map it on top of a reservation mapping. so we must take
|
||||
// a leap of faith there aren't any mystery mappings twixt the guard
|
||||
// region and growsdown page below.
|
||||
char *guard_region =
|
||||
flixmap(region, guardsize, PROT_NONE, MAP_NORESERVE_LINUX);
|
||||
if (!guard_region) {
|
||||
RecoverFromMmapFailure:
|
||||
if (errno != EEXIST) {
|
||||
// mmap() probably raised enomem due to rlimit_as etc.
|
||||
__maps_untrack(region, need);
|
||||
return 0;
|
||||
} else {
|
||||
// we've encountered a mystery mapping. it's hard to imagine
|
||||
// this happening, since we don't use map_growsdown when
|
||||
// cosmo_dlopen() is linked in the binary. in that case, the
|
||||
// tracker we created covers at least some of the rogue map,
|
||||
// therefore this issue should fix itself if we keep going
|
||||
errno = olde;
|
||||
__maps_lock();
|
||||
++max;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// ask kernel to create stack pages
|
||||
// taking special care to not clobber untracked mappings
|
||||
char *top_page = flixmap(region + need - __pagesize, __pagesize,
|
||||
PROT_READ | PROT_WRITE, MAP_GROWSDOWN_LINUX);
|
||||
if (!top_page) {
|
||||
sys_munmap(region, guardsize);
|
||||
goto RecoverFromMmapFailure;
|
||||
}
|
||||
|
||||
// return address to bottom of stack
|
||||
return region + guardsize;
|
||||
}
|
||||
__maps_unlock();
|
||||
errno = ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static errno_t cosmo_stack_munmap(char *stackaddr, size_t stacksize,
|
||||
size_t guardsize) {
|
||||
errno_t r = 0;
|
||||
errno_t e = errno;
|
||||
if (!munmap(addr, size)) {
|
||||
if (!munmap(stackaddr - guardsize, //
|
||||
guardsize + stacksize)) {
|
||||
r = errno;
|
||||
errno = e;
|
||||
}
|
||||
|
@ -119,7 +254,8 @@ static void cosmo_stack_rehabilitate(struct Dll *stacks) {
|
|||
struct Dll *e;
|
||||
for (e = dll_first(stacks); e; e = dll_next(stacks, e))
|
||||
cosmo_stack_munmap(THREADSTACK_CONTAINER(e)->stackaddr,
|
||||
THREADSTACK_CONTAINER(e)->stacksize);
|
||||
THREADSTACK_CONTAINER(e)->stacksize,
|
||||
THREADSTACK_CONTAINER(e)->guardsize);
|
||||
cosmo_stack_lock();
|
||||
dll_make_first(&cosmo_stacks.objects, stacks);
|
||||
cosmo_stack_unlock();
|
||||
|
@ -193,39 +329,41 @@ void cosmo_stack_setmaxstacks(int maxstacks) {
|
|||
* abstract all the gory details of gaining authorized memory, and
|
||||
* additionally implements caching for lightning fast performance.
|
||||
*
|
||||
* The stack size must be nonzero. It is rounded up to the granularity
|
||||
* of the underlying system allocator, which is normally the page size.
|
||||
* Your parameter will be updated with the selected value upon success.
|
||||
* The stack size must be nonzero. It specifies the minimum amount of
|
||||
* stack space that will be available for use. The provided value is
|
||||
* rounded up to the system page size. It may be increased further for
|
||||
* various reasons. Your stack size parameter will be updated with the
|
||||
* chosen value upon success.
|
||||
*
|
||||
* The guard size specifies how much memory should be protected at the
|
||||
* bottom of your stack. This is helpful for ensuring stack overflows
|
||||
* will result in a segmentation fault, rather than corrupting memory
|
||||
* silently. This may be set to zero, in which case no guard pages will
|
||||
* be protected. This value is rounded up to the system page size. The
|
||||
* corrected value will be returned upon success. Your guard size needs
|
||||
* to be small enough to leave room for at least one memory page in your
|
||||
* stack size i.e. `guardsize + pagesize <= stacksize` must be the case.
|
||||
* Otherwise this function will return an `EINVAL` error.
|
||||
* The guard size specifies the minimum amount of memory that should be
|
||||
* protected beneath your stack. This helps ensure stack overflows cause
|
||||
* a segfault rather than corrupting memory silently. This may be set to
|
||||
* zero in which case no guard pages will be made. This value is rounded
|
||||
* up to the system page size. The corrected value will be returned upon
|
||||
* success. Your guard size needs to be small enough to leave room for
|
||||
* at least one memory page in your stack size i.e. `guardsize +
|
||||
* pagesize <= stacksize` must be the case. Otherwise this function will
|
||||
* return an `EINVAL` error.
|
||||
*
|
||||
* When you're done using your stack, pass it to cosmo_stack_free() so
|
||||
* it can be recycled. Stacks are only recycled when the `stacksize` and
|
||||
* `guardsize` parameters are an exact match after correction. Otherwise
|
||||
* they'll likely be freed eventually, in a least-recently used fashion,
|
||||
* based upon the configurable cosmo_stack_setmaxstacks() setting.
|
||||
* `guardsize` parameters match the constraints described above. Stacks
|
||||
* that don't end up getting reused will be freed eventually, in a least
|
||||
* recently used way based upon your cosmo_stack_setmaxstacks() setting.
|
||||
*
|
||||
* This function returns 0 on success, or an errno on error. See the
|
||||
* documentation of mmap() for a list possible errors that may occur.
|
||||
*/
|
||||
errno_t cosmo_stack_alloc(unsigned *inout_stacksize, //
|
||||
unsigned *inout_guardsize, //
|
||||
errno_t cosmo_stack_alloc(size_t *inout_stacksize, //
|
||||
size_t *inout_guardsize, //
|
||||
void **out_addr) {
|
||||
|
||||
// validate arguments
|
||||
unsigned stacksize = *inout_stacksize;
|
||||
unsigned guardsize = *inout_guardsize;
|
||||
stacksize = (stacksize + __gransize - 1) & -__gransize;
|
||||
size_t stacksize = *inout_stacksize;
|
||||
size_t guardsize = *inout_guardsize;
|
||||
stacksize = (stacksize + __pagesize - 1) & -__pagesize;
|
||||
guardsize = (guardsize + __pagesize - 1) & -__pagesize;
|
||||
if (guardsize + __pagesize > stacksize)
|
||||
if (!stacksize)
|
||||
return EINVAL;
|
||||
|
||||
// recycle stack
|
||||
|
@ -236,8 +374,10 @@ errno_t cosmo_stack_alloc(unsigned *inout_stacksize, //
|
|||
struct CosmoStack *ts = THREADSTACK_CONTAINER(e);
|
||||
if (ts->stacksize == stacksize && //
|
||||
ts->guardsize == guardsize) {
|
||||
dll_remove(&cosmo_stacks.stacks, e);
|
||||
stackaddr = ts->stackaddr;
|
||||
stacksize = ts->stacksize;
|
||||
guardsize = ts->guardsize;
|
||||
dll_remove(&cosmo_stacks.stacks, e);
|
||||
dll_make_first(&cosmo_stacks.objects, e);
|
||||
--cosmo_stacks.count;
|
||||
break;
|
||||
|
@ -247,20 +387,37 @@ errno_t cosmo_stack_alloc(unsigned *inout_stacksize, //
|
|||
|
||||
// create stack
|
||||
if (!stackaddr) {
|
||||
errno_t e = errno;
|
||||
stackaddr = mmap(0, stacksize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (stackaddr == MAP_FAILED) {
|
||||
errno_t err = errno;
|
||||
errno = e;
|
||||
return err;
|
||||
errno_t olde = errno;
|
||||
if (!IsTiny() && IsLinux() && guardsize && !_weaken(cosmo_dlopen) &&
|
||||
stacksize <= cosmo_stack_maxgrow() && !IsQemuUser()) {
|
||||
// this special linux-only stack allocator significantly reduces
|
||||
// the consumption of virtual memory.
|
||||
if (!(stackaddr = slackmap(stacksize, guardsize))) {
|
||||
errno_t err = errno;
|
||||
errno = olde;
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
char *map = mmap(0, guardsize + stacksize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (map == MAP_FAILED) {
|
||||
errno_t err = errno;
|
||||
errno = olde;
|
||||
return err;
|
||||
}
|
||||
stackaddr = map + guardsize;
|
||||
if (IsOpenbsd())
|
||||
if (!TellOpenbsdThisIsStackMemory(stackaddr, stacksize))
|
||||
notpossible;
|
||||
if (guardsize) {
|
||||
if (mprotect(map, guardsize, PROT_NONE | PROT_GUARD)) {
|
||||
errno_t err = errno;
|
||||
munmap(map, guardsize + stacksize);
|
||||
errno = olde;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (IsOpenbsd())
|
||||
if (!TellOpenbsdThisIsStackMemory(stackaddr, stacksize))
|
||||
notpossible;
|
||||
if (guardsize)
|
||||
if (mprotect(stackaddr, guardsize, PROT_NONE | PROT_GUARD))
|
||||
notpossible;
|
||||
}
|
||||
|
||||
// return stack
|
||||
|
@ -277,20 +434,22 @@ static void cosmo_stack_setup(void) {
|
|||
/**
|
||||
* Frees stack memory.
|
||||
*
|
||||
* While not strictly required, it's assumed these three values would be
|
||||
* those returned by an earlier call to cosmo_stack_alloc().
|
||||
* While not strictly required, it's assumed the three parameters are
|
||||
* those returned by an earlier call to cosmo_stack_alloc(). If they
|
||||
* aren't page aligned and rounded, this function will return EINVAL.
|
||||
*
|
||||
* This function returns 0 on success, or an errno on error. The `errno`
|
||||
* variable is never clobbered. You can only dependably count on this to
|
||||
* return an error on failure when you say `cosmo_stack_setmaxstacks(0)`
|
||||
*/
|
||||
errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
|
||||
unsigned guardsize) {
|
||||
stacksize = (stacksize + __gransize - 1) & -__gransize;
|
||||
guardsize = (guardsize + __pagesize - 1) & -__pagesize;
|
||||
if (guardsize + __pagesize > stacksize)
|
||||
errno_t cosmo_stack_free(void *stackaddr, size_t stacksize, size_t guardsize) {
|
||||
if (!stacksize)
|
||||
return EINVAL;
|
||||
if ((uintptr_t)stackaddr & (__gransize - 1))
|
||||
if (stacksize & (__pagesize - 1))
|
||||
return EINVAL;
|
||||
if (guardsize & (__pagesize - 1))
|
||||
return EINVAL;
|
||||
if ((uintptr_t)stackaddr & (__pagesize - 1))
|
||||
return EINVAL;
|
||||
cosmo_stack_lock();
|
||||
struct Dll *surplus = 0;
|
||||
|
@ -318,7 +477,7 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
|
|||
cosmo_stack_rehabilitate(surplus);
|
||||
errno_t err = 0;
|
||||
if (stackaddr)
|
||||
err = cosmo_stack_munmap(stackaddr, stacksize);
|
||||
err = cosmo_stack_munmap(stackaddr, stacksize, guardsize);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue