Reduce mandatory stack rss by 256kb

This commit is contained in:
Justine Tunney 2023-09-07 04:30:44 -07:00
parent 0e087143fd
commit b592716d1c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
15 changed files with 98 additions and 125 deletions

View file

@ -280,6 +280,8 @@ SECTIONS {
ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000; ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000;
ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024; ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024;
ape_stack_align = DEFINED(ape_stack_align) ? ape_stack_align : 16;
ape_stack_round = -ape_stack_align;
_tls_size = _tbss_end - _tdata_start; _tls_size = _tbss_end - _tdata_start;
_tdata_size = _tdata_end - _tdata_start; _tdata_size = _tdata_end - _tdata_start;

View file

@ -585,7 +585,8 @@ ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000;
ape_stack_paddr = ape_ram_paddr + ape_ram_filesz; ape_stack_paddr = ape_ram_paddr + ape_ram_filesz;
ape_stack_filesz = 0; ape_stack_filesz = 0;
ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024; ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024;
ape_stack_align = 16; ape_stack_align = DEFINED(ape_stack_align) ? ape_stack_align : 16;
ape_stack_round = -ape_stack_align;
ape_note_offset = ape_cod_offset + (ape_note - ape_cod_vaddr); ape_note_offset = ape_cod_offset + (ape_note - ape_cod_vaddr);
ape_note_filesz = ape_note_end - ape_note; ape_note_filesz = ape_note_end - ape_note;

View file

@ -68,14 +68,12 @@ _start:
mov %rsp,__oldstack(%rip) mov %rsp,__oldstack(%rip)
mov %rdx,__envp(%rip) mov %rdx,__envp(%rip)
// setup backtraces // setup stack
xor %ebp,%ebp xor %ebp,%ebp
and $ape_stack_round,%rsp
// make process stack (8mb) follow thread stack (256kb) alignment #if SupportsWindows() && !IsTiny()
and $-(256*1024),%rsp // make win32 imps crash
#if SupportsWindows()
// make win32 imps noop
.weak ape_idata_iat .weak ape_idata_iat
.weak ape_idata_iatend .weak ape_idata_iatend
.weak __oops_win32 .weak __oops_win32
@ -122,9 +120,12 @@ _start:
// this is the first argument to cosmo() below // this is the first argument to cosmo() below
mov x0,sp mov x0,sp
// make process stack (8mb) conform to thread stack (256kb) alignment // setup the stack
mov x1,sp mov x29,#0
and sp,x1,-(256*1024) mov x30,#0
ldr x1,=ape_stack_round
and x1,x0,x1
mov sp,x1
// second arg shall be struct Syslib passed by ape-m1.c // second arg shall be struct Syslib passed by ape-m1.c
// used to talk to apple's authoritarian libraries // used to talk to apple's authoritarian libraries

View file

@ -38,7 +38,6 @@ void __enable_tls(void);
void *__cxa_finalize(void *); void *__cxa_finalize(void *);
void __stack_chk_fail(void) wontreturn relegated; void __stack_chk_fail(void) wontreturn relegated;
void __stack_chk_fail_local(void) wontreturn relegated; void __stack_chk_fail_local(void) wontreturn relegated;
void _jmpstack(void *, void *, ...) wontreturn;
long _setstack(void *, void *, ...); long _setstack(void *, void *, ...);
int GetDosArgv(const char16_t *, char *, size_t, char **, size_t); int GetDosArgv(const char16_t *, char *, size_t, char **, size_t);
int GetDosEnviron(const char16_t *, char *, size_t, char **, size_t); int GetDosEnviron(const char16_t *, char *, size_t, char **, size_t);

View file

@ -1,37 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Switches stack.
//
// @param rdi is new rsp, passed as malloc(size) + size
// @param rsi is function to call in new stack space
// @param rdx,rcx,r8,r9 get passed as args to rsi
// @noreturn
_jmpstack:
mov %rdi,%rsp
mov %rsi,%rax
mov %rdx,%rdi
mov %rcx,%rsi
mov %r8,%rdx
mov %r9,%rcx
xor %ebp,%ebp
call *%rax
.unreachable
.endfn _jmpstack,globl,hidden

View file

@ -18,46 +18,18 @@
#define GetGuardSize() 16384 #define GetGuardSize() 16384
/** /**
* Tunes APE stack maximum size. * Align APE main thread stack at startup.
* *
* The bottom-most page will be protected to ensure your stack does not * You need this in your main program module:
* magically grow beyond this value. It's possible to detect stack
* overflows, by calling `ShowCrashReports()`. Your stack size must be a
* power of two; the linker will check this.
* *
* If you want to know how much stack your programs needs, then * STATIC_STACK_ALIGN(GetStackSize());
* *
* __static_yoink("stack_usage_logging"); * If you want to use GetStackAddr() and HaveStackMemory() safely on
* * your main thread in your process. It causes crt.S to waste a tiny
* will install an atexit() handler that appends to `o/$MODE/stack.log` * amount of memory to ensure those macros go extremely fast.
*
* @see libc/sysv/systemfive.S
* @see ape/ape.lds
*/ */
#define STATIC_STACK_SIZE(BYTES) \ #define STATIC_STACK_ALIGN(BYTES) \
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA) _STACK_SYMBOL("ape_stack_align", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
/**
* Tunes APE stack virtual address.
*
* This value must be aligned according to your stack size, and that's
* checked by your linker script. This defaults to `0x700000000000` so
*
* 1. It's easy to see how close you are to the bottom
* 2. The linker script error is unlikely to happen
*
* This macro will be respected, with two exceptions
*
* 1. In MODE=tiny the operating system provided stack is used instead
* 2. Windows 7 doesn't support 64-bit addresses, so we'll instead use
* `0x10000000 - GetStackSize()` as the stack address
*
* @see libc/sysv/systemfive.S
* @see libc/nt/winmain.greg.c
* @see ape/ape.lds
*/
#define STATIC_STACK_ADDR(ADDR) \
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
/** /**
* Makes program stack executable if declared, e.g. * Makes program stack executable if declared, e.g.
@ -78,7 +50,7 @@
#define _STACK_STRINGIFY(ADDR) #ADDR #define _STACK_STRINGIFY(ADDR) #ADDR
#define _STACK_SYMBOL(NAME, VALUE) \ #define _STACK_SYMBOL(NAME, VALUE) \
asm(".equ\t" NAME "," VALUE "\n\t" \ __asm__(".equ\t" NAME "," VALUE "\n\t" \
".globl\t" NAME) ".globl\t" NAME)
#ifdef __SANITIZE_ADDRESS__ #ifdef __SANITIZE_ADDRESS__
@ -95,29 +67,71 @@ extern char ape_stack_memsz[] __attribute__((__weak__));
extern char ape_stack_align[] __attribute__((__weak__)); extern char ape_stack_align[] __attribute__((__weak__));
/** /**
* Returns address of bottom of stack. * Returns address of bottom of current stack.
* *
* This takes into consideration threads and sigaltstack. This is * This always works on threads. If you want it to work on the main
* implemented as a fast pure expression, since we're able to make the * process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
* assumption that stack sizes are two powers and aligned. This is * which will burn O(256kb) of memory to ensure thread invariants.
* thanks to (1) the linker script checks the statically chosen sizes,
* and (2) the mmap() address picker will choose aligned addresses when
* the provided size is a two power.
*/ */
#define GetStackAddr() \ #define GetStackAddr() \
(((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize()) (((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize())
#define GetStaticStackSize() ((uintptr_t)ape_stack_memsz) #define GetStaticStackSize() ((uintptr_t)ape_stack_memsz)
/**
* Returns true if at least `n` bytes of stack are available.
*
* This always works on threads. If you want it to work on the main
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
* which will burn O(256kb) of memory to ensure thread invariants,
* which make this check exceedingly fast.
*/
#define HaveStackMemory(n) \
((intptr_t)__builtin_frame_address(0) >= \
GetStackAddr() + GetGuardSize() + (n))
/**
* Extends stack memory by poking large allocations.
*
* This can be particularly useful depending on how your system
* implements guard pages. For example, Windows can make stacks
* that aren't fully committed, in which case there's only 4096
* bytes of grows-down guard pages made by portable executable.
* If you alloca() more memory than that, you should call this,
* since it'll not only ensure stack overflows are detected, it
* will also trigger the stack to grow down safely.
*/
__funline void CheckLargeStackAllocation(void *p, ssize_t n) {
for (; n > 0; n -= 4096) {
((char *)p)[n - 1] = 0;
}
}
void *NewCosmoStack(void) vallocesque;
int FreeCosmoStack(void *) libcesque;
/**
* Tunes stack size of main thread on Windows.
*
* On UNIX systems use `RLIMIT_STACK` to tune the main thread size.
*/
#define STATIC_STACK_SIZE(BYTES) \
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
/**
* Tunes main thread stack address on Windows.
*/
#define STATIC_STACK_ADDR(ADDR) \
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
#ifdef __x86_64__ #ifdef __x86_64__
/** /**
* Returns preferred bottom address of stack. * Returns preferred bottom address of main thread stack.
* *
* This is the stakc address of the main process. The only time that * On UNIX systems we favor the system provided stack, so this only
* isn't guaranteed to be the case is in MODE=tiny, since it doesn't * really applies to Windows. It's configurable at link time. It is
* link the code for stack creation at startup. This generally isn't * needed because polyfilling fork requires that we know, precicely
* problematic, since MODE=tiny doesn't use any of the runtime codes * where the stack memory begins and ends.
* which want the stack to be cheaply knowable, e.g. ftrace, kprintf
*/ */
#define GetStaticStackAddr(ADDEND) \ #define GetStaticStackAddr(ADDEND) \
({ \ ({ \
@ -132,25 +146,6 @@ extern char ape_stack_align[] __attribute__((__weak__));
#define GetStaticStackAddr(ADDEND) (GetStackAddr() + ADDEND) #define GetStaticStackAddr(ADDEND) (GetStackAddr() + ADDEND)
#endif #endif
/**
* Returns true if at least `n` bytes of stack are available.
*/
#define HaveStackMemory(n) \
((intptr_t)__builtin_frame_address(0) >= \
GetStackAddr() + GetGuardSize() + (n))
/**
* Extends stack memory by poking large allocations.
*/
forceinline void CheckLargeStackAllocation(void *p, ssize_t n) {
for (; n > 0; n -= 4096) {
((char *)p)[n - 1] = 0;
}
}
void *NewCosmoStack(void) vallocesque;
int FreeCosmoStack(void *) libcesque;
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* GNU ELF */ #endif /* GNU ELF */
#endif /* _COSMO_SOURCE */ #endif /* _COSMO_SOURCE */

View file

@ -27,6 +27,10 @@
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/sysv/consts/o.h" #include "libc/sysv/consts/o.h"
// TODO(jart): Delete?
STATIC_STACK_ALIGN(GetStackSize());
static char stacklog[1024]; static char stacklog[1024];
dontasan size_t GetStackUsage(char *s, size_t n) { dontasan size_t GetStackUsage(char *s, size_t n) {

View file

@ -77,7 +77,10 @@ __msabi extern typeof(SetStdHandle) *const __imp_SetStdHandle;
__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect; __msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
// clang-format on // clang-format on
extern void cosmo(int, char **, char **, long (*)[2]) wontreturn; void cosmo(int, char **, char **, long (*)[2]) wontreturn;
void __switch_stacks(int, char **, char **, long (*)[2],
void (*)(int, char **, char **, long (*)[2]),
intptr_t) wontreturn;
static const signed char kNtStdio[3] = { static const signed char kNtStdio[3] = {
(signed char)kNtStdInputHandle, (signed char)kNtStdInputHandle,
@ -211,8 +214,8 @@ __msabi static textwindows wontreturn void WinInit(const char16_t *cmdline) {
__envp = &wa->envp[0]; __envp = &wa->envp[0];
// handover control to cosmopolitan runtime // handover control to cosmopolitan runtime
_jmpstack((char *)(stackaddr + (stacksize - sizeof(struct WinArgs))), cosmo, __switch_stacks(count, wa->argv, wa->envp, wa->auxv, cosmo,
count, wa->argv, wa->envp, wa->auxv); stackaddr + (stacksize - sizeof(struct WinArgs)));
} }
__msabi textwindows int64_t WinMain(int64_t hInstance, int64_t hPrevInstance, __msabi textwindows int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,

View file

@ -112,6 +112,7 @@ long double
coshl(long double x) coshl(long double x)
{ {
long double hi,lo,x2,x4; long double hi,lo,x2,x4;
(void)x4;
#if LDBL_MANT_DIG == 113 #if LDBL_MANT_DIG == 113
double dx2; double dx2;
#endif #endif

View file

@ -108,6 +108,7 @@ long double
sinhl(long double x) sinhl(long double x)
{ {
long double hi,lo,x2,x4; long double hi,lo,x2,x4;
(void)x4;
#if LDBL_MANT_DIG == 113 #if LDBL_MANT_DIG == 113
double dx2; double dx2;
#endif #endif

View file

@ -135,6 +135,7 @@ long double
tanhl(long double x) tanhl(long double x)
{ {
long double hi,lo,s,x2,x4,z; long double hi,lo,s,x2,x4,z;
(void)x4;
#if LDBL_MANT_DIG == 113 #if LDBL_MANT_DIG == 113
double dx2; double dx2;
#endif #endif

View file

@ -57,7 +57,7 @@ Lua 5.4.3 (MIT License)\\n\
Copyright 19942021 Lua.org, PUC-Rio.\""); Copyright 19942021 Lua.org, PUC-Rio.\"");
asm(".include \"libc/disclaimer.inc\""); asm(".include \"libc/disclaimer.inc\"");
STATIC_STACK_SIZE(0x80000); STATIC_STACK_ALIGN(GetStackSize());
#if !defined(LUA_PROGNAME) #if !defined(LUA_PROGNAME)
#define LUA_PROGNAME "lua" #define LUA_PROGNAME "lua"

View file

@ -50,7 +50,6 @@ Lua 5.4.3 (MIT License)\\n\
Copyright 19942021 Lua.org, PUC-Rio.\""); Copyright 19942021 Lua.org, PUC-Rio.\"");
asm(".include \"libc/disclaimer.inc\""); asm(".include \"libc/disclaimer.inc\"");
static void PrintFunction(const Proto* f, int full); static void PrintFunction(const Proto* f, int full);
#define luaU_print PrintFunction #define luaU_print PrintFunction

View file

@ -4,11 +4,14 @@
Python 3 Python 3
https://docs.python.org/3/license.html │ https://docs.python.org/3/license.html │
*/ */
#include "libc/runtime/stack.h"
#include "third_party/python/Include/yoink.h" #include "third_party/python/Include/yoink.h"
#include "third_party/python/runpythonmodule.h" #include "third_party/python/runpythonmodule.h"
#include "tool/args/args.h" #include "tool/args/args.h"
// clang-format off // clang-format off
STATIC_STACK_ALIGN(GetStackSize());
PYTHON_YOINK("xed"); PYTHON_YOINK("xed");
PYTHON_YOINK("xterm"); PYTHON_YOINK("xterm");

View file

@ -142,7 +142,7 @@
#pragma GCC diagnostic ignored "-Wunused-variable" #pragma GCC diagnostic ignored "-Wunused-variable"
STATIC_STACK_SIZE(0x80000); STATIC_STACK_ALIGN(GetStackSize());
__static_yoink("zipos"); __static_yoink("zipos");