Reduce stack virtual memory consumption on Linux

This commit is contained in:
Justine Tunney 2024-12-25 19:43:43 -08:00
parent cc8a9eb93c
commit 36e5861b0c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
31 changed files with 583 additions and 166 deletions

View file

@ -30,6 +30,8 @@
#include "libc/nt/thread.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
@ -47,7 +49,8 @@ static textwindows dontinstrument uint32_t __itimer_worker(void *arg) {
__bootstrap_tls(&tls, sp);
__maps_track(
(char *)(((uintptr_t)sp + __pagesize - 1) & -__pagesize) - STACK_SIZE,
STACK_SIZE);
STACK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK);
for (;;) {
bool dosignal = false;
struct timeval now, waituntil;

View file

@ -35,8 +35,8 @@
*/
void *NewCosmoStack(void) {
void *stackaddr;
unsigned stacksize = GetStackSize();
unsigned guardsize = GetGuardSize();
size_t stacksize = GetStackSize();
size_t guardsize = GetGuardSize();
errno_t err = cosmo_stack_alloc(&stacksize, &guardsize, &stackaddr);
if (!err)
return stackaddr;

View file

@ -19,7 +19,7 @@
#include "libc/thread/thread.h"
/**
* Returns size of protected region at bottom of thread stack.
* Returns size of protected region beneath thread stack.
*
* @param guardsize will be set to guard size in bytes
* @return 0 on success, or errno on error

View file

@ -20,15 +20,13 @@
#include "libc/thread/thread.h"
/**
* Returns configuration for thread stack.
* Returns configuration for custom thread stack.
*
* This is a getter for a configuration attribute. By default, zeros are
* returned. If pthread_attr_setstack() was called earlier, then this'll
* return those earlier supplied values.
* If zero is returned to `*stackaddr` then a custom stack hasn't been
* specified by a previous call to pthread_attr_setstack().
*
* @param stackaddr will be set to stack address in bytes
* @return 0 on success, or errno on error
* @see pthread_attr_setstacksize()
*/
errno_t pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr,
size_t *stacksize) {

View file

@ -40,7 +40,7 @@
errno_t pthread_attr_init(pthread_attr_t *attr) {
*attr = (pthread_attr_t){
.__stacksize = GetStackSize(),
.__guardsize = __pagesize,
.__guardsize = GetGuardSize(),
};
return 0;
}

View file

@ -19,13 +19,7 @@
#include "libc/thread/thread.h"
/**
* Sets size of protected region at bottom of thread stack.
*
* Cosmopolitan sets this value to `sysconf(_SC_PAGESIZE)` by default.
*
* You may set `guardsize` to disable the stack guard feature and gain a
* slight performance advantage by avoiding mprotect() calls. Note that
* it could make your code more prone to silent unreported corruption.
* Sets minimum size of protected region beneath thread stack.
*
* @param guardsize contains guard size in bytes, which is implicitly
* rounded up to `sysconf(_SC_PAGESIZE)`, or zero to disable

View file

@ -16,64 +16,42 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/limits.h"
#include "libc/runtime/stack.h"
#include "libc/thread/thread.h"
/**
* Configures custom allocated stack for thread, e.g.
* Configures custom stack for thread.
*
* pthread_t id;
* pthread_attr_t attr;
* char *stk = NewCosmoStack();
* pthread_attr_init(&attr);
* pthread_attr_setstack(&attr, stk, GetStackSize());
* pthread_create(&id, &attr, func, 0);
* pthread_attr_destroy(&attr);
* pthread_join(id, 0);
* FreeCosmoStack(stk);
* Normally you want to use pthread_attr_setstacksize() and
* pthread_attr_setguardsize() to configure how pthread_create()
* allocates stack memory for newly created threads. Cosmopolitan is
* very good at managing stack memory. However if you still want to
* allocate stack memory on your own, POSIX defines this function.
*
* Your stack must have at least `PTHREAD_STACK_MIN` bytes, which
* Cosmpolitan Libc defines as `GetStackSize()`. It's a link-time
* constant used by Actually Portable Executable that's 128 kb by
* default. See libc/runtime/stack.h for docs on your stack limit
* since the APE ELF phdrs are the one true source of truth here.
* Your `stackaddr` points to the byte at the very bottom of your stack.
* You are responsible for this memory. Your POSIX threads runtime will
* not free or unmap this allocation when the thread has terminated. If
* `stackaddr` is null then `stacksize` is ignored and default behavior
* is restored, i.e. pthread_create() will manage stack allocations.
*
* Cosmpolitan Libc runtime magic (e.g. ftrace) and memory safety
* (e.g. kprintf) assumes that stack sizes are two-powers and are
* aligned to that two-power. Conformance isn't required since we
* say caveat emptor to those who don't maintain these invariants
* please consider using NewCosmoStack(), which is always perfect
* or use `mmap(0, GetStackSize() << 1, ...)` for a bigger stack.
* Your `stackaddr` could be created by malloc(). On OpenBSD,
* pthread_create() will augment your custom allocation so it's
* permissable by the kernel to use as a stack. You may also call
* Cosmopolitan APIs such NewCosmoStack() and cosmo_stack_alloc().
* Static memory can be used, but it won't reduce pthread footprint.
*
* Unlike pthread_attr_setstacksize(), this function permits just
* about any parameters and will change the values and allocation
* as needed to conform to the mandatory requirements of the host
* operating system even if it doesn't meet the stricter needs of
* Cosmopolitan Libc userspace libraries. For example with malloc
* allocations, things like page size alignment, shall be handled
* automatically for compatibility with existing codebases.
*
* The same stack shouldn't be used for two separate threads. Use
* fresh stacks for each thread so that ASAN can be much happier.
*
* @param stackaddr is address of stack allocated by caller, and
* may be NULL in which case default behavior is restored
* @param stacksize is size of caller allocated stack
* @return 0 on success, or errno on error
* @raise EINVAL if parameters were unacceptable
* @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN`
* @see pthread_attr_setstacksize()
*/
errno_t pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr,
size_t stacksize) {
if (!stackaddr) {
attr->__stackaddr = 0;
attr->__stacksize = 0;
attr->__stacksize = GetStackSize();
return 0;
}
if (stacksize > INT_MAX)
return EINVAL;
if (stacksize < PTHREAD_STACK_MIN)
return EINVAL;
attr->__stackaddr = stackaddr;

View file

@ -17,19 +17,28 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/limits.h"
#include "libc/thread/thread.h"
/**
* Defines minimum stack size for thread.
* Specifies minimum stack size for thread.
*
* On Linux, if you're not using `cosmocc -mtiny`, and you're not using
* cosmo_dlopen(), and guard size is nonzero, then `MAP_GROWSDOWN` will
* be used to create your stack memory. This helps minimize virtual
* memory consumption. Please note this is only possible if `stacksize`
* is no larger than the current `RLIMIT_STACK`, otherwise the runtime
* will map your stack using plain old mmap().
*
* Non-custom stacks may be recycled by the cosmo runtime. You can
* control this behavior by calling cosmo_stack_setmaxstacks(). It's
* useful for both tuning performance and hardening security. See also
* pthread_attr_setguardsize() which is important for security too.
*
* @param stacksize contains stack size in bytes
* @return 0 on success, or errno on error
* @raise EINVAL if `stacksize` is less than `PTHREAD_STACK_MIN`
*/
errno_t pthread_attr_setstacksize(pthread_attr_t *a, size_t stacksize) {
if (stacksize > INT_MAX)
return EINVAL;
if (stacksize < PTHREAD_STACK_MIN)
return EINVAL;
a->__stacksize = stacksize;

View file

@ -2,7 +2,7 @@
#define COSMOPOLITAN_LIBC_THREAD_THREAD_H_
#define PTHREAD_KEYS_MAX 46
#define PTHREAD_STACK_MIN 65536
#define PTHREAD_STACK_MIN 32768
#define PTHREAD_USE_NSYNC 1
#define PTHREAD_DESTRUCTOR_ITERATIONS 4
@ -129,8 +129,8 @@ typedef struct pthread_attr_s {
int __contentionscope;
int __sigaltstacksize;
uint64_t __sigmask;
unsigned __guardsize;
unsigned __stacksize;
size_t __guardsize;
size_t __stacksize;
void *__stackaddr;
void *__sigaltstackaddr;
} pthread_attr_t;