mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
Reduce mandatory stack rss by 256kb
This commit is contained in:
parent
0e087143fd
commit
b592716d1c
15 changed files with 98 additions and 125 deletions
|
@ -38,7 +38,6 @@ void __enable_tls(void);
|
|||
void *__cxa_finalize(void *);
|
||||
void __stack_chk_fail(void) wontreturn relegated;
|
||||
void __stack_chk_fail_local(void) wontreturn relegated;
|
||||
void _jmpstack(void *, void *, ...) wontreturn;
|
||||
long _setstack(void *, void *, ...);
|
||||
int GetDosArgv(const char16_t *, char *, size_t, char **, size_t);
|
||||
int GetDosEnviron(const char16_t *, char *, size_t, char **, size_t);
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Switches stack.
|
||||
//
|
||||
// @param rdi is new rsp, passed as malloc(size) + size
|
||||
// @param rsi is function to call in new stack space
|
||||
// @param rdx,rcx,r8,r9 get passed as args to rsi
|
||||
// @noreturn
|
||||
_jmpstack:
|
||||
mov %rdi,%rsp
|
||||
mov %rsi,%rax
|
||||
mov %rdx,%rdi
|
||||
mov %rcx,%rsi
|
||||
mov %r8,%rdx
|
||||
mov %r9,%rcx
|
||||
xor %ebp,%ebp
|
||||
call *%rax
|
||||
.unreachable
|
||||
.endfn _jmpstack,globl,hidden
|
|
@ -18,46 +18,18 @@
|
|||
#define GetGuardSize() 16384
|
||||
|
||||
/**
|
||||
* Tunes APE stack maximum size.
|
||||
* Align APE main thread stack at startup.
|
||||
*
|
||||
* The bottom-most page will be protected to ensure your stack does not
|
||||
* magically grow beyond this value. It's possible to detect stack
|
||||
* overflows, by calling `ShowCrashReports()`. Your stack size must be a
|
||||
* power of two; the linker will check this.
|
||||
* You need this in your main program module:
|
||||
*
|
||||
* If you want to know how much stack your programs needs, then
|
||||
* STATIC_STACK_ALIGN(GetStackSize());
|
||||
*
|
||||
* __static_yoink("stack_usage_logging");
|
||||
*
|
||||
* will install an atexit() handler that appends to `o/$MODE/stack.log`
|
||||
*
|
||||
* @see libc/sysv/systemfive.S
|
||||
* @see ape/ape.lds
|
||||
* If you want to use GetStackAddr() and HaveStackMemory() safely on
|
||||
* your main thread in your process. It causes crt.S to waste a tiny
|
||||
* amount of memory to ensure those macros go extremely fast.
|
||||
*/
|
||||
#define STATIC_STACK_SIZE(BYTES) \
|
||||
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
|
||||
|
||||
/**
|
||||
* Tunes APE stack virtual address.
|
||||
*
|
||||
* This value must be aligned according to your stack size, and that's
|
||||
* checked by your linker script. This defaults to `0x700000000000` so
|
||||
*
|
||||
* 1. It's easy to see how close you are to the bottom
|
||||
* 2. The linker script error is unlikely to happen
|
||||
*
|
||||
* This macro will be respected, with two exceptions
|
||||
*
|
||||
* 1. In MODE=tiny the operating system provided stack is used instead
|
||||
* 2. Windows 7 doesn't support 64-bit addresses, so we'll instead use
|
||||
* `0x10000000 - GetStackSize()` as the stack address
|
||||
*
|
||||
* @see libc/sysv/systemfive.S
|
||||
* @see libc/nt/winmain.greg.c
|
||||
* @see ape/ape.lds
|
||||
*/
|
||||
#define STATIC_STACK_ADDR(ADDR) \
|
||||
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
|
||||
#define STATIC_STACK_ALIGN(BYTES) \
|
||||
_STACK_SYMBOL("ape_stack_align", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
|
||||
|
||||
/**
|
||||
* Makes program stack executable if declared, e.g.
|
||||
|
@ -77,9 +49,9 @@
|
|||
#define STATIC_EXEC_STACK() _STACK_SYMBOL("ape_stack_pf", "7")
|
||||
|
||||
#define _STACK_STRINGIFY(ADDR) #ADDR
|
||||
#define _STACK_SYMBOL(NAME, VALUE) \
|
||||
asm(".equ\t" NAME "," VALUE "\n\t" \
|
||||
".globl\t" NAME)
|
||||
#define _STACK_SYMBOL(NAME, VALUE) \
|
||||
__asm__(".equ\t" NAME "," VALUE "\n\t" \
|
||||
".globl\t" NAME)
|
||||
|
||||
#ifdef __SANITIZE_ADDRESS__
|
||||
#define _STACK_EXTRA "*2"
|
||||
|
@ -95,29 +67,71 @@ extern char ape_stack_memsz[] __attribute__((__weak__));
|
|||
extern char ape_stack_align[] __attribute__((__weak__));
|
||||
|
||||
/**
|
||||
* Returns address of bottom of stack.
|
||||
* Returns address of bottom of current stack.
|
||||
*
|
||||
* This takes into consideration threads and sigaltstack. This is
|
||||
* implemented as a fast pure expression, since we're able to make the
|
||||
* assumption that stack sizes are two powers and aligned. This is
|
||||
* thanks to (1) the linker script checks the statically chosen sizes,
|
||||
* and (2) the mmap() address picker will choose aligned addresses when
|
||||
* the provided size is a two power.
|
||||
* This always works on threads. If you want it to work on the main
|
||||
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
|
||||
* which will burn O(256kb) of memory to ensure thread invariants.
|
||||
*/
|
||||
#define GetStackAddr() \
|
||||
(((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize())
|
||||
|
||||
#define GetStaticStackSize() ((uintptr_t)ape_stack_memsz)
|
||||
|
||||
/**
|
||||
* Returns true if at least `n` bytes of stack are available.
|
||||
*
|
||||
* This always works on threads. If you want it to work on the main
|
||||
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
|
||||
* which will burn O(256kb) of memory to ensure thread invariants,
|
||||
* which make this check exceedingly fast.
|
||||
*/
|
||||
#define HaveStackMemory(n) \
|
||||
((intptr_t)__builtin_frame_address(0) >= \
|
||||
GetStackAddr() + GetGuardSize() + (n))
|
||||
|
||||
/**
|
||||
* Extends stack memory by poking large allocations.
|
||||
*
|
||||
* This can be particularly useful depending on how your system
|
||||
* implements guard pages. For example, Windows can make stacks
|
||||
* that aren't fully committed, in which case there's only 4096
|
||||
* bytes of grows-down guard pages made by portable executable.
|
||||
* If you alloca() more memory than that, you should call this,
|
||||
* since it'll not only ensure stack overflows are detected, it
|
||||
* will also trigger the stack to grow down safely.
|
||||
*/
|
||||
__funline void CheckLargeStackAllocation(void *p, ssize_t n) {
|
||||
for (; n > 0; n -= 4096) {
|
||||
((char *)p)[n - 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void *NewCosmoStack(void) vallocesque;
|
||||
int FreeCosmoStack(void *) libcesque;
|
||||
|
||||
/**
|
||||
* Tunes stack size of main thread on Windows.
|
||||
*
|
||||
* On UNIX systems use `RLIMIT_STACK` to tune the main thread size.
|
||||
*/
|
||||
#define STATIC_STACK_SIZE(BYTES) \
|
||||
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
|
||||
|
||||
/**
|
||||
* Tunes main thread stack address on Windows.
|
||||
*/
|
||||
#define STATIC_STACK_ADDR(ADDR) \
|
||||
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
|
||||
|
||||
#ifdef __x86_64__
|
||||
/**
|
||||
* Returns preferred bottom address of stack.
|
||||
* Returns preferred bottom address of main thread stack.
|
||||
*
|
||||
* This is the stakc address of the main process. The only time that
|
||||
* isn't guaranteed to be the case is in MODE=tiny, since it doesn't
|
||||
* link the code for stack creation at startup. This generally isn't
|
||||
* problematic, since MODE=tiny doesn't use any of the runtime codes
|
||||
* which want the stack to be cheaply knowable, e.g. ftrace, kprintf
|
||||
* On UNIX systems we favor the system provided stack, so this only
|
||||
* really applies to Windows. It's configurable at link time. It is
|
||||
* needed because polyfilling fork requires that we know, precicely
|
||||
* where the stack memory begins and ends.
|
||||
*/
|
||||
#define GetStaticStackAddr(ADDEND) \
|
||||
({ \
|
||||
|
@ -132,25 +146,6 @@ extern char ape_stack_align[] __attribute__((__weak__));
|
|||
#define GetStaticStackAddr(ADDEND) (GetStackAddr() + ADDEND)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns true if at least `n` bytes of stack are available.
|
||||
*/
|
||||
#define HaveStackMemory(n) \
|
||||
((intptr_t)__builtin_frame_address(0) >= \
|
||||
GetStackAddr() + GetGuardSize() + (n))
|
||||
|
||||
/**
|
||||
* Extends stack memory by poking large allocations.
|
||||
*/
|
||||
forceinline void CheckLargeStackAllocation(void *p, ssize_t n) {
|
||||
for (; n > 0; n -= 4096) {
|
||||
((char *)p)[n - 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void *NewCosmoStack(void) vallocesque;
|
||||
int FreeCosmoStack(void *) libcesque;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* GNU ELF */
|
||||
#endif /* _COSMO_SOURCE */
|
||||
|
|
|
@ -27,6 +27,10 @@
|
|||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/o.h"
|
||||
|
||||
// TODO(jart): Delete?
|
||||
|
||||
STATIC_STACK_ALIGN(GetStackSize());
|
||||
|
||||
static char stacklog[1024];
|
||||
|
||||
dontasan size_t GetStackUsage(char *s, size_t n) {
|
||||
|
|
|
@ -77,7 +77,10 @@ __msabi extern typeof(SetStdHandle) *const __imp_SetStdHandle;
|
|||
__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
|
||||
// clang-format on
|
||||
|
||||
extern void cosmo(int, char **, char **, long (*)[2]) wontreturn;
|
||||
void cosmo(int, char **, char **, long (*)[2]) wontreturn;
|
||||
void __switch_stacks(int, char **, char **, long (*)[2],
|
||||
void (*)(int, char **, char **, long (*)[2]),
|
||||
intptr_t) wontreturn;
|
||||
|
||||
static const signed char kNtStdio[3] = {
|
||||
(signed char)kNtStdInputHandle,
|
||||
|
@ -211,8 +214,8 @@ __msabi static textwindows wontreturn void WinInit(const char16_t *cmdline) {
|
|||
__envp = &wa->envp[0];
|
||||
|
||||
// handover control to cosmopolitan runtime
|
||||
_jmpstack((char *)(stackaddr + (stacksize - sizeof(struct WinArgs))), cosmo,
|
||||
count, wa->argv, wa->envp, wa->auxv);
|
||||
__switch_stacks(count, wa->argv, wa->envp, wa->auxv, cosmo,
|
||||
stackaddr + (stacksize - sizeof(struct WinArgs)));
|
||||
}
|
||||
|
||||
__msabi textwindows int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue