mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Reduce mandatory stack rss by 256kb
This commit is contained in:
parent
0e087143fd
commit
b592716d1c
15 changed files with 98 additions and 125 deletions
|
@ -280,6 +280,8 @@ SECTIONS {
|
|||
|
||||
ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000;
|
||||
ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024;
|
||||
ape_stack_align = DEFINED(ape_stack_align) ? ape_stack_align : 16;
|
||||
ape_stack_round = -ape_stack_align;
|
||||
|
||||
_tls_size = _tbss_end - _tdata_start;
|
||||
_tdata_size = _tdata_end - _tdata_start;
|
||||
|
|
|
@ -585,7 +585,8 @@ ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000;
|
|||
ape_stack_paddr = ape_ram_paddr + ape_ram_filesz;
|
||||
ape_stack_filesz = 0;
|
||||
ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024;
|
||||
ape_stack_align = 16;
|
||||
ape_stack_align = DEFINED(ape_stack_align) ? ape_stack_align : 16;
|
||||
ape_stack_round = -ape_stack_align;
|
||||
|
||||
ape_note_offset = ape_cod_offset + (ape_note - ape_cod_vaddr);
|
||||
ape_note_filesz = ape_note_end - ape_note;
|
||||
|
|
|
@ -68,14 +68,12 @@ _start:
|
|||
mov %rsp,__oldstack(%rip)
|
||||
mov %rdx,__envp(%rip)
|
||||
|
||||
// setup backtraces
|
||||
// setup stack
|
||||
xor %ebp,%ebp
|
||||
and $ape_stack_round,%rsp
|
||||
|
||||
// make process stack (8mb) follow thread stack (256kb) alignment
|
||||
and $-(256*1024),%rsp
|
||||
|
||||
#if SupportsWindows()
|
||||
// make win32 imps noop
|
||||
#if SupportsWindows() && !IsTiny()
|
||||
// make win32 imps crash
|
||||
.weak ape_idata_iat
|
||||
.weak ape_idata_iatend
|
||||
.weak __oops_win32
|
||||
|
@ -122,9 +120,12 @@ _start:
|
|||
// this is the first argument to cosmo() below
|
||||
mov x0,sp
|
||||
|
||||
// make process stack (8mb) conform to thread stack (256kb) alignment
|
||||
mov x1,sp
|
||||
and sp,x1,-(256*1024)
|
||||
// setup the stack
|
||||
mov x29,#0
|
||||
mov x30,#0
|
||||
ldr x1,=ape_stack_round
|
||||
and x1,x0,x1
|
||||
mov sp,x1
|
||||
|
||||
// second arg shall be struct Syslib passed by ape-m1.c
|
||||
// used to talk to apple's authoritarian libraries
|
||||
|
|
|
@ -38,7 +38,6 @@ void __enable_tls(void);
|
|||
void *__cxa_finalize(void *);
|
||||
void __stack_chk_fail(void) wontreturn relegated;
|
||||
void __stack_chk_fail_local(void) wontreturn relegated;
|
||||
void _jmpstack(void *, void *, ...) wontreturn;
|
||||
long _setstack(void *, void *, ...);
|
||||
int GetDosArgv(const char16_t *, char *, size_t, char **, size_t);
|
||||
int GetDosEnviron(const char16_t *, char *, size_t, char **, size_t);
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Switches stack.
|
||||
//
|
||||
// @param rdi is new rsp, passed as malloc(size) + size
|
||||
// @param rsi is function to call in new stack space
|
||||
// @param rdx,rcx,r8,r9 get passed as args to rsi
|
||||
// @noreturn
|
||||
_jmpstack:
|
||||
mov %rdi,%rsp
|
||||
mov %rsi,%rax
|
||||
mov %rdx,%rdi
|
||||
mov %rcx,%rsi
|
||||
mov %r8,%rdx
|
||||
mov %r9,%rcx
|
||||
xor %ebp,%ebp
|
||||
call *%rax
|
||||
.unreachable
|
||||
.endfn _jmpstack,globl,hidden
|
|
@ -18,46 +18,18 @@
|
|||
#define GetGuardSize() 16384
|
||||
|
||||
/**
|
||||
* Tunes APE stack maximum size.
|
||||
* Align APE main thread stack at startup.
|
||||
*
|
||||
* The bottom-most page will be protected to ensure your stack does not
|
||||
* magically grow beyond this value. It's possible to detect stack
|
||||
* overflows, by calling `ShowCrashReports()`. Your stack size must be a
|
||||
* power of two; the linker will check this.
|
||||
* You need this in your main program module:
|
||||
*
|
||||
* If you want to know how much stack your programs needs, then
|
||||
* STATIC_STACK_ALIGN(GetStackSize());
|
||||
*
|
||||
* __static_yoink("stack_usage_logging");
|
||||
*
|
||||
* will install an atexit() handler that appends to `o/$MODE/stack.log`
|
||||
*
|
||||
* @see libc/sysv/systemfive.S
|
||||
* @see ape/ape.lds
|
||||
* If you want to use GetStackAddr() and HaveStackMemory() safely on
|
||||
* your main thread in your process. It causes crt.S to waste a tiny
|
||||
* amount of memory to ensure those macros go extremely fast.
|
||||
*/
|
||||
#define STATIC_STACK_SIZE(BYTES) \
|
||||
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
|
||||
|
||||
/**
|
||||
* Tunes APE stack virtual address.
|
||||
*
|
||||
* This value must be aligned according to your stack size, and that's
|
||||
* checked by your linker script. This defaults to `0x700000000000` so
|
||||
*
|
||||
* 1. It's easy to see how close you are to the bottom
|
||||
* 2. The linker script error is unlikely to happen
|
||||
*
|
||||
* This macro will be respected, with two exceptions
|
||||
*
|
||||
* 1. In MODE=tiny the operating system provided stack is used instead
|
||||
* 2. Windows 7 doesn't support 64-bit addresses, so we'll instead use
|
||||
* `0x10000000 - GetStackSize()` as the stack address
|
||||
*
|
||||
* @see libc/sysv/systemfive.S
|
||||
* @see libc/nt/winmain.greg.c
|
||||
* @see ape/ape.lds
|
||||
*/
|
||||
#define STATIC_STACK_ADDR(ADDR) \
|
||||
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
|
||||
#define STATIC_STACK_ALIGN(BYTES) \
|
||||
_STACK_SYMBOL("ape_stack_align", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
|
||||
|
||||
/**
|
||||
* Makes program stack executable if declared, e.g.
|
||||
|
@ -78,7 +50,7 @@
|
|||
|
||||
#define _STACK_STRINGIFY(ADDR) #ADDR
|
||||
#define _STACK_SYMBOL(NAME, VALUE) \
|
||||
asm(".equ\t" NAME "," VALUE "\n\t" \
|
||||
__asm__(".equ\t" NAME "," VALUE "\n\t" \
|
||||
".globl\t" NAME)
|
||||
|
||||
#ifdef __SANITIZE_ADDRESS__
|
||||
|
@ -95,29 +67,71 @@ extern char ape_stack_memsz[] __attribute__((__weak__));
|
|||
extern char ape_stack_align[] __attribute__((__weak__));
|
||||
|
||||
/**
|
||||
* Returns address of bottom of stack.
|
||||
* Returns address of bottom of current stack.
|
||||
*
|
||||
* This takes into consideration threads and sigaltstack. This is
|
||||
* implemented as a fast pure expression, since we're able to make the
|
||||
* assumption that stack sizes are two powers and aligned. This is
|
||||
* thanks to (1) the linker script checks the statically chosen sizes,
|
||||
* and (2) the mmap() address picker will choose aligned addresses when
|
||||
* the provided size is a two power.
|
||||
* This always works on threads. If you want it to work on the main
|
||||
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
|
||||
* which will burn O(256kb) of memory to ensure thread invariants.
|
||||
*/
|
||||
#define GetStackAddr() \
|
||||
(((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize())
|
||||
|
||||
#define GetStaticStackSize() ((uintptr_t)ape_stack_memsz)
|
||||
|
||||
/**
|
||||
* Returns true if at least `n` bytes of stack are available.
|
||||
*
|
||||
* This always works on threads. If you want it to work on the main
|
||||
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
|
||||
* which will burn O(256kb) of memory to ensure thread invariants,
|
||||
* which make this check exceedingly fast.
|
||||
*/
|
||||
#define HaveStackMemory(n) \
|
||||
((intptr_t)__builtin_frame_address(0) >= \
|
||||
GetStackAddr() + GetGuardSize() + (n))
|
||||
|
||||
/**
|
||||
* Extends stack memory by poking large allocations.
|
||||
*
|
||||
* This can be particularly useful depending on how your system
|
||||
* implements guard pages. For example, Windows can make stacks
|
||||
* that aren't fully committed, in which case there's only 4096
|
||||
* bytes of grows-down guard pages made by portable executable.
|
||||
* If you alloca() more memory than that, you should call this,
|
||||
* since it'll not only ensure stack overflows are detected, it
|
||||
* will also trigger the stack to grow down safely.
|
||||
*/
|
||||
__funline void CheckLargeStackAllocation(void *p, ssize_t n) {
|
||||
for (; n > 0; n -= 4096) {
|
||||
((char *)p)[n - 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void *NewCosmoStack(void) vallocesque;
|
||||
int FreeCosmoStack(void *) libcesque;
|
||||
|
||||
/**
|
||||
* Tunes stack size of main thread on Windows.
|
||||
*
|
||||
* On UNIX systems use `RLIMIT_STACK` to tune the main thread size.
|
||||
*/
|
||||
#define STATIC_STACK_SIZE(BYTES) \
|
||||
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
|
||||
|
||||
/**
|
||||
* Tunes main thread stack address on Windows.
|
||||
*/
|
||||
#define STATIC_STACK_ADDR(ADDR) \
|
||||
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
|
||||
|
||||
#ifdef __x86_64__
|
||||
/**
|
||||
* Returns preferred bottom address of stack.
|
||||
* Returns preferred bottom address of main thread stack.
|
||||
*
|
||||
* This is the stakc address of the main process. The only time that
|
||||
* isn't guaranteed to be the case is in MODE=tiny, since it doesn't
|
||||
* link the code for stack creation at startup. This generally isn't
|
||||
* problematic, since MODE=tiny doesn't use any of the runtime codes
|
||||
* which want the stack to be cheaply knowable, e.g. ftrace, kprintf
|
||||
* On UNIX systems we favor the system provided stack, so this only
|
||||
* really applies to Windows. It's configurable at link time. It is
|
||||
* needed because polyfilling fork requires that we know, precicely
|
||||
* where the stack memory begins and ends.
|
||||
*/
|
||||
#define GetStaticStackAddr(ADDEND) \
|
||||
({ \
|
||||
|
@ -132,25 +146,6 @@ extern char ape_stack_align[] __attribute__((__weak__));
|
|||
#define GetStaticStackAddr(ADDEND) (GetStackAddr() + ADDEND)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns true if at least `n` bytes of stack are available.
|
||||
*/
|
||||
#define HaveStackMemory(n) \
|
||||
((intptr_t)__builtin_frame_address(0) >= \
|
||||
GetStackAddr() + GetGuardSize() + (n))
|
||||
|
||||
/**
|
||||
* Extends stack memory by poking large allocations.
|
||||
*/
|
||||
forceinline void CheckLargeStackAllocation(void *p, ssize_t n) {
|
||||
for (; n > 0; n -= 4096) {
|
||||
((char *)p)[n - 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void *NewCosmoStack(void) vallocesque;
|
||||
int FreeCosmoStack(void *) libcesque;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* GNU ELF */
|
||||
#endif /* _COSMO_SOURCE */
|
||||
|
|
|
@ -27,6 +27,10 @@
|
|||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/o.h"
|
||||
|
||||
// TODO(jart): Delete?
|
||||
|
||||
STATIC_STACK_ALIGN(GetStackSize());
|
||||
|
||||
static char stacklog[1024];
|
||||
|
||||
dontasan size_t GetStackUsage(char *s, size_t n) {
|
||||
|
|
|
@ -77,7 +77,10 @@ __msabi extern typeof(SetStdHandle) *const __imp_SetStdHandle;
|
|||
__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
|
||||
// clang-format on
|
||||
|
||||
extern void cosmo(int, char **, char **, long (*)[2]) wontreturn;
|
||||
void cosmo(int, char **, char **, long (*)[2]) wontreturn;
|
||||
void __switch_stacks(int, char **, char **, long (*)[2],
|
||||
void (*)(int, char **, char **, long (*)[2]),
|
||||
intptr_t) wontreturn;
|
||||
|
||||
static const signed char kNtStdio[3] = {
|
||||
(signed char)kNtStdInputHandle,
|
||||
|
@ -211,8 +214,8 @@ __msabi static textwindows wontreturn void WinInit(const char16_t *cmdline) {
|
|||
__envp = &wa->envp[0];
|
||||
|
||||
// handover control to cosmopolitan runtime
|
||||
_jmpstack((char *)(stackaddr + (stacksize - sizeof(struct WinArgs))), cosmo,
|
||||
count, wa->argv, wa->envp, wa->auxv);
|
||||
__switch_stacks(count, wa->argv, wa->envp, wa->auxv, cosmo,
|
||||
stackaddr + (stacksize - sizeof(struct WinArgs)));
|
||||
}
|
||||
|
||||
__msabi textwindows int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,
|
||||
|
|
|
@ -112,6 +112,7 @@ long double
|
|||
coshl(long double x)
|
||||
{
|
||||
long double hi,lo,x2,x4;
|
||||
(void)x4;
|
||||
#if LDBL_MANT_DIG == 113
|
||||
double dx2;
|
||||
#endif
|
||||
|
|
|
@ -108,6 +108,7 @@ long double
|
|||
sinhl(long double x)
|
||||
{
|
||||
long double hi,lo,x2,x4;
|
||||
(void)x4;
|
||||
#if LDBL_MANT_DIG == 113
|
||||
double dx2;
|
||||
#endif
|
||||
|
|
|
@ -135,6 +135,7 @@ long double
|
|||
tanhl(long double x)
|
||||
{
|
||||
long double hi,lo,s,x2,x4,z;
|
||||
(void)x4;
|
||||
#if LDBL_MANT_DIG == 113
|
||||
double dx2;
|
||||
#endif
|
||||
|
|
2
third_party/lua/lua.main.c
vendored
2
third_party/lua/lua.main.c
vendored
|
@ -57,7 +57,7 @@ Lua 5.4.3 (MIT License)\\n\
|
|||
Copyright 1994–2021 Lua.org, PUC-Rio.\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
STATIC_STACK_SIZE(0x80000);
|
||||
STATIC_STACK_ALIGN(GetStackSize());
|
||||
|
||||
#if !defined(LUA_PROGNAME)
|
||||
#define LUA_PROGNAME "lua"
|
||||
|
|
1
third_party/lua/luac.main.c
vendored
1
third_party/lua/luac.main.c
vendored
|
@ -50,7 +50,6 @@ Lua 5.4.3 (MIT License)\\n\
|
|||
Copyright 1994–2021 Lua.org, PUC-Rio.\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
|
||||
static void PrintFunction(const Proto* f, int full);
|
||||
#define luaU_print PrintFunction
|
||||
|
||||
|
|
3
third_party/python/python.c
vendored
3
third_party/python/python.c
vendored
|
@ -4,11 +4,14 @@
|
|||
│ Python 3 │
|
||||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "third_party/python/Include/yoink.h"
|
||||
#include "third_party/python/runpythonmodule.h"
|
||||
#include "tool/args/args.h"
|
||||
// clang-format off
|
||||
|
||||
STATIC_STACK_ALIGN(GetStackSize());
|
||||
|
||||
PYTHON_YOINK("xed");
|
||||
PYTHON_YOINK("xterm");
|
||||
|
||||
|
|
|
@ -142,7 +142,7 @@
|
|||
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
|
||||
STATIC_STACK_SIZE(0x80000);
|
||||
STATIC_STACK_ALIGN(GetStackSize());
|
||||
|
||||
__static_yoink("zipos");
|
||||
|
||||
|
|
Loading…
Reference in a new issue