Rewrite brk() and sbrk()

This change reduces the .bss memory requirement for all executables by
O(64kb). The brk system calls are now fully tested and figured out and
might be useful for tiny programs that only target System Five.
This commit is contained in:
Justine Tunney 2022-10-01 23:11:56 -07:00
parent bc8532688b
commit 5005f2e446
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
23 changed files with 643 additions and 294 deletions

View file

@ -18,45 +18,122 @@
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/intrin/nopl.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/macros.internal.h"
#include "libc/runtime/brk.internal.h"
#include "libc/runtime/directmap.internal.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
uintptr_t __break;
struct Brk __brk;
static bool OverlapsMmappedMemory(unsigned char *p, size_t n) {
int a, b, i;
_unassert(n);
a = (intptr_t)p >> 16;
b = (intptr_t)(p + n - 1) >> 16;
i = FindMemoryInterval(&_mmi, a);
if (i < _mmi.i) {
if (a <= _mmi.p[i].x && _mmi.p[i].x <= b) return true;
if (a <= _mmi.p[i].y && _mmi.p[i].y <= b) return true;
if (_mmi.p[i].x <= a && b <= _mmi.p[i].y) return true;
}
return false;
}
static unsigned char *brk_unlocked(unsigned char *p) {
int rc;
struct DirectMap dm;
_unassert(!((intptr_t)__brk.p & (PAGESIZE - 1)));
if (p >= __brk.p) {
p = (unsigned char *)ROUNDUP((intptr_t)p, PAGESIZE);
} else {
p = (unsigned char *)ROUNDDOWN((intptr_t)p, PAGESIZE);
}
if (IsWindows()) {
rc = enosys();
} else if (p < _end) {
rc = einval();
} else if (p > __brk.p) {
if (!OverlapsMmappedMemory(__brk.p, p - __brk.p)) {
// we always polyfill this system call because
// 1. Linux has brk() but its behavior is poorly documented
// 2. FreeBSD has sbrk(int):int but it's foreseeable it could go away
// 3. XNU/OpenBSD/NetBSD have all deleted this interface in the kernel
dm = sys_mmap(__brk.p, p - __brk.p, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
rc = (int)(intptr_t)dm.addr; // safe b/c __brk.p is page-aligned
} else {
rc = eexist();
}
} else if (p < __brk.p) {
rc = sys_munmap(p, __brk.p - p);
} else {
rc = 0;
}
if (rc != -1) {
__brk.p = p;
return 0;
} else {
return (unsigned char *)-1;
}
}
int brk_lock(void) {
pthread_mutex_lock(&__brk.m);
return 0;
}
void brk_unlock(void) {
pthread_mutex_unlock(&__brk.m);
}
#ifdef _NOPL0
#define brk_lock() _NOPL0("__threadcalls", brk_lock)
#define brk_unlock() _NOPL0("__threadcalls", brk_unlock)
#else
#define brk_lock() (__threaded ? brk_lock() : 0)
#define brk_unlock() (__threaded ? brk_unlock() : 0)
#endif
/**
* Sets end of data section.
*
* This can be used to allocate and deallocate memory. It won't
* conflict with malloc() and mmap(NULL, ...) allocations since
* APE binaries load the image at 0x440000 and does allocations
* starting at 0x100080040000. You should consult _end, or call
* sbrk(NULL), to figure out where the existing break is first.
* Your program break starts right after end of `.bss` as defined
* by the external linker-defined variable `end`. Setting it to a
* higher address will allocate more memory. After using this you
* may dealocate memory by specifying it back to a lower address.
*
* @return 0 on success or -1 w/ errno
* @see mmap(), sbrk(), _end
* The only virtue of brk(), and sbrk(), aside from compatibility
* with legacy software, is it's tinier than mmap() because since
* this API only supports Unix, we don't bother doing the complex
* memory interval tracking that mmap() does.
*
* @param neu is the new end address of data segment, which shall
* be rounded automatically to a 4096-byte granularity
* @return 0 on success, or -1 w/ errno
* @raise EINVAL if `neu` is less than the `end` of `.bss`
* @raise EEXIST if expanded break would overlap existing mmap() memory
* @raise ENOMEM if `RLIMIT_DATA` / `RLIMIT_AS` / `RLIMIT_RSS` is exceeded
* @raise ENOSYS on Windows because WIN32 puts random stuff after your break
* @threadsafe
*/
int brk(void *end) {
int rc;
uintptr_t x;
if (!__break) __break = (uintptr_t)_end;
x = (uintptr_t)end;
if (x < (uintptr_t)_end) x = (uintptr_t)_end;
x = ROUNDUP(x, FRAMESIZE);
if (x == __break) return 0;
/* allocate one frame at a time due to nt pickiness */
for (; x > __break; __break += FRAMESIZE) {
if (mmap((void *)__break, FRAMESIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == MAP_FAILED) {
return -1;
}
}
for (rc = 0; x < __break; __break -= FRAMESIZE) {
rc |= munmap((void *)(__break - FRAMESIZE), FRAMESIZE);
}
return 0;
int brk(void *neu) {
unsigned char *rc;
brk_lock();
if (!__brk.p) __brk.p = _end;
rc = brk_unlocked(neu);
brk_unlock();
STRACE("brk(%p) → %d% m", neu, rc);
return (int)(intptr_t)rc;
}
/**
@ -64,14 +141,41 @@ int brk(void *end) {
*
* This shrinks or increases the program break by delta bytes. On
* success, the previous program break is returned. It's possible
* to pass zero to this function to get the current program break
* to pass 0 to this function to obtain the current program break
* which is initially set to the linker-defined external variable
* `end` which is the end of the `.bss` segment. Your allocations
* are rounded automatically to a 4096-byte granularity.
*
* @return old break on success or -1 w/ errno
* @see mmap(), brk(), _end
* The only virtue of sbrk(), and brk(), aside from compatibility
* with legacy software, is it's tinier than mmap() because since
* this API only supports Unix, we don't bother doing the complex
* memory interval tracking that mmap() does.
*
* @param delta is the number of bytes to allocate (or free if negative)
* noting that your delta may be tuned to a number further from zero
* to accommodate the page size granularity of this allocator
* @return previous break on success, or `(void *)-1` w/ errno
* @raise EINVAL if new break would be less than the `end` of `.bss`
* @raise EEXIST if expanded break would overlap existing mmap() memory
* @raise EOVERFLOW if `delta` added to break overflows the address space
* @raise ENOMEM if `RLIMIT_DATA` / `RLIMIT_AS` / `RLIMIT_RSS` is exceeded
* @raise ENOSYS on Windows because WIN32 puts random stuff after your break
* @threadsafe
*/
void *sbrk(intptr_t delta) {
uintptr_t oldbreak;
if (!__break) __break = (uintptr_t)_end;
oldbreak = __break;
return (void *)(brk((void *)(__break + delta)) != -1 ? oldbreak : -1);
intptr_t neu;
unsigned char *rc, *old;
brk_lock();
if (!__brk.p) __brk.p = _end;
old = __brk.p;
if (!__builtin_add_overflow((intptr_t)__brk.p, delta, &neu) &&
IsLegalPointer((unsigned char *)neu)) {
rc = brk_unlocked((unsigned char *)neu);
if (!rc) rc = old;
} else {
rc = (void *)eoverflow();
}
brk_unlock();
STRACE("sbrk(%'ld) → %p% m", delta, rc);
return rc;
}

View file

@ -0,0 +1,16 @@
#ifndef COSMOPOLITAN_LIBC_RUNTIME_BRK_INTERNAL_H_
#define COSMOPOLITAN_LIBC_RUNTIME_BRK_INTERNAL_H_
#include "libc/thread/thread.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct Brk {
unsigned char *p;
pthread_mutex_t m;
};
extern struct Brk __brk;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_RUNTIME_BRK_INTERNAL_H_ */

View file

@ -1,8 +1,6 @@
#ifndef COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_
#define COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/midpoint.h"
#include "libc/intrin/nopl.internal.h"
#include "libc/macros.internal.h"
#include "libc/nt/version.h"
@ -31,9 +29,9 @@ struct MemoryInterval {
int y;
long h;
long size;
int prot;
int flags;
long offset;
int flags;
char prot;
bool iscow;
bool readonlyfile;
};
@ -44,20 +42,21 @@ struct MemoryIntervals {
struct MemoryInterval s[OPEN_MAX];
};
extern hidden struct MemoryIntervals _mmi;
extern struct MemoryIntervals _mmi;
void __mmi_lock(void) hidden;
void __mmi_unlock(void) hidden;
bool IsMemtracked(int, int) hidden;
void PrintSystemMappings(int) hidden;
bool AreMemoryIntervalsOk(const struct MemoryIntervals *) nosideeffect hidden;
void PrintMemoryIntervals(int, const struct MemoryIntervals *) hidden;
void __mmi_lock(void);
void __mmi_unlock(void);
bool IsMemtracked(int, int);
void PrintSystemMappings(int);
unsigned FindMemoryInterval(const struct MemoryIntervals *, int) nosideeffect;
bool AreMemoryIntervalsOk(const struct MemoryIntervals *) nosideeffect;
void PrintMemoryIntervals(int, const struct MemoryIntervals *);
int TrackMemoryInterval(struct MemoryIntervals *, int, int, long, int, int,
bool, bool, long, long) hidden;
bool, bool, long, long);
int ReleaseMemoryIntervals(struct MemoryIntervals *, int, int,
void (*)(struct MemoryIntervals *, int, int)) hidden;
void ReleaseMemoryNt(struct MemoryIntervals *, int, int) hidden;
int UntrackMemoryIntervals(void *, size_t) hidden;
void (*)(struct MemoryIntervals *, int, int));
void ReleaseMemoryNt(struct MemoryIntervals *, int, int);
int UntrackMemoryIntervals(void *, size_t);
size_t GetMemtrackSize(struct MemoryIntervals *);
#ifdef _NOPL0
@ -172,23 +171,6 @@ forceinline pureconst bool OverlapsShadowSpace(const void *p, size_t n) {
}
}
forceinline unsigned FindMemoryInterval(const struct MemoryIntervals *mm,
int x) {
unsigned l, m, r;
l = 0;
r = mm->i;
while (l < r) {
m = _midpoint(l, r);
if (mm->p[m].y < x) {
l = m + 1;
} else {
r = m;
}
}
assert(l == mm->i || x <= mm->p[l].y);
return l;
}
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_ */

View file

@ -71,7 +71,7 @@ static wontreturn void OnUnrecoverableMmapError(const char *s) {
static noasan inline bool OverlapsExistingMapping(char *p, size_t n) {
int a, b, i;
assert(n > 0);
_unassert(n > 0);
a = FRAME(p);
b = FRAME(p + (n - 1));
i = FindMemoryInterval(&_mmi, a);
@ -86,7 +86,7 @@ static noasan inline bool OverlapsExistingMapping(char *p, size_t n) {
static noasan bool ChooseMemoryInterval(int x, int n, int align, int *res) {
// TODO: improve performance
int i, start, end;
assert(align > 0);
_unassert(align > 0);
if (_mmi.i) {
// find the start of the automap memory region
@ -202,8 +202,8 @@ static textwindows dontinline noasan void *MapMemories(char *addr, size_t size,
struct DirectMap dm;
bool iscow, readonlyfile;
m = (size_t)(n - 1) << 16;
assert(m < size);
assert(m + FRAMESIZE >= size);
_unassert(m < size);
_unassert(m + FRAMESIZE >= size);
oi = fd == -1 ? 0 : off + m;
sz = size - m;
dm = sys_mmap(addr + m, sz, prot, f, fd, oi);

View file

@ -17,7 +17,6 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/intrin/strace.internal.h"
#include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/sched_param.h"
#include "libc/calls/struct/sigset.h"
@ -31,6 +30,7 @@
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/promises.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/cpuid4.internal.h"
#include "libc/nexgen32e/kcpuids.h"
@ -139,8 +139,8 @@ static noasan void PrintDependencies(const char *prologue) {
do {
const struct NtLdrDataTableEntry *dll =
(const struct NtLdrDataTableEntry *)ldr;
PRINT(" ☼ %.*!hs (%'zukb)", dll->FullDllName.Length, dll->FullDllName.Data,
dll->SizeOfImage / 1024);
PRINT(" ☼ %.*!hs (%'zukb @ %p)", dll->FullDllName.Length,
dll->FullDllName.Data, dll->SizeOfImage / 1024, dll->DllBase);
} while ((ldr = ldr->Next) && ldr != head);
}

View file

@ -8,43 +8,43 @@ COSMOPOLITAN_C_START_
typedef long jmp_buf[8];
extern char **environ; /* CRT */
extern int __argc; /* CRT */
extern char **__argv; /* CRT */
extern char **__envp; /* CRT */
extern unsigned long *__auxv; /* CRT */
extern intptr_t __oldstack; /* CRT */
extern uint64_t __nosync; /* SYS */
extern _Atomic(int) __ftrace; /* SYS */
extern _Atomic(int) __strace; /* SYS */
extern char *program_invocation_name; /* RII */
extern char *program_invocation_short_name; /* RII */
extern uint64_t __syscount; /* RII */
extern uint64_t kStartTsc; /* RII */
extern char kTmpPath[]; /* RII */
extern const char kNtSystemDirectory[]; /* RII */
extern const char kNtWindowsDirectory[]; /* RII */
extern unsigned char _base[] forcealign(PAGESIZE); /* αpε */
extern unsigned char _ehead[] forcealign(PAGESIZE); /* αpε */
extern unsigned char _etext[] forcealign(PAGESIZE); /* αpε */
extern unsigned char _edata[] forcealign(PAGESIZE); /* αpε */
extern unsigned char _ezip[]; /* αpε */
extern unsigned char _end[] forcealign(FRAMESIZE); /* αpε */
extern unsigned char _ereal[]; /* αpε */
extern unsigned char __privileged_start[]; /* αpε */
extern unsigned char __privileged_addr[]; /* αpε */
extern unsigned char __privileged_size[]; /* αpε */
extern unsigned char __privileged_end[]; /* αpε */
extern unsigned char __test_start[]; /* αpε */
extern unsigned char __ro[]; /* αpε */
extern unsigned char *__relo_start[]; /* αpε */
extern unsigned char *__relo_end[]; /* αpε */
extern uint8_t __zip_start[]; /* αpε */
extern uint8_t __zip_end[]; /* αpε */
extern uint8_t __data_start[]; /* αpε */
extern uint8_t __data_end[]; /* αpε */
extern uint8_t __bss_start[]; /* αpε */
extern uint8_t __bss_end[]; /* αpε */
extern char **environ; /* CRT */
extern int __argc; /* CRT */
extern char **__argv; /* CRT */
extern char **__envp; /* CRT */
extern unsigned long *__auxv; /* CRT */
extern intptr_t __oldstack; /* CRT */
extern uint64_t __nosync; /* SYS */
extern _Atomic(int) __ftrace; /* SYS */
extern _Atomic(int) __strace; /* SYS */
extern char *program_invocation_name; /* RII */
extern char *program_invocation_short_name; /* RII */
extern uint64_t __syscount; /* RII */
extern uint64_t kStartTsc; /* RII */
extern char kTmpPath[]; /* RII */
extern const char kNtSystemDirectory[]; /* RII */
extern const char kNtWindowsDirectory[]; /* RII */
extern unsigned char _base[]; /* αpε */
extern unsigned char _ehead[]; /* αpε */
extern unsigned char _etext[]; /* αpε */
extern unsigned char _edata[]; /* αpε */
extern unsigned char _ezip[]; /* αpε */
extern unsigned char _end[]; /* αpε */
extern unsigned char _ereal[]; /* αpε */
extern unsigned char __privileged_start[]; /* αpε */
extern unsigned char __privileged_addr[]; /* αpε */
extern unsigned char __privileged_size[]; /* αpε */
extern unsigned char __privileged_end[]; /* αpε */
extern unsigned char __test_start[]; /* αpε */
extern unsigned char __ro[]; /* αpε */
extern unsigned char *__relo_start[]; /* αpε */
extern unsigned char *__relo_end[]; /* αpε */
extern uint8_t __zip_start[]; /* αpε */
extern uint8_t __zip_end[]; /* αpε */
extern uint8_t __data_start[]; /* αpε */
extern uint8_t __data_end[]; /* αpε */
extern uint8_t __bss_start[]; /* αpε */
extern uint8_t __bss_end[]; /* αpε */
extern size_t __virtualmax;
extern bool __isworker;