cosmopolitan/libc/runtime/mmap.c

501 lines
18 KiB
C
Raw Normal View History

2020-06-15 14:18:57 +00:00
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
2020-12-28 01:18:44 +00:00
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
2020-06-15 14:18:57 +00:00
2020-12-28 01:18:44 +00:00
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
2020-06-15 14:18:57 +00:00
*/
#include "libc/assert.h"
#include "libc/bits/likely.h"
2020-09-03 12:44:37 +00:00
#include "libc/bits/weaken.h"
2020-06-15 14:18:57 +00:00
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/calls/strace.internal.h"
2022-05-23 22:06:11 +00:00
#include "libc/calls/syscall-sysv.internal.h"
2020-06-15 14:18:57 +00:00
#include "libc/dce.h"
2021-10-15 02:36:49 +00:00
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/asancodes.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/spinlock.h"
#include "libc/limits.h"
Make numerous improvements - Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 05:58:51 +00:00
#include "libc/log/backtrace.internal.h"
2021-10-15 02:36:49 +00:00
#include "libc/log/libfatal.internal.h"
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/nt/process.h"
#include "libc/nt/runtime.h"
#include "libc/nt/struct/processmemorycounters.h"
#include "libc/rand/rand.h"
#include "libc/runtime/directmap.internal.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h"
2020-06-15 14:18:57 +00:00
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
2020-06-15 14:18:57 +00:00
#include "libc/sysv/errfuns.h"
#define MAP_ANONYMOUS_linux 0x00000020
#define MAP_ANONYMOUS_openbsd 0x00001000
#define MAP_GROWSDOWN_linux 0x00000100
#define MAP_STACK_freebsd 0x00000400
#define MAP_STACK_openbsd 0x00004000
#define IP(X) (intptr_t)(X)
#define VIP(X) (void *)IP(X)
#define ALIGNED(p) (!(IP(p) & (FRAMESIZE - 1)))
#define ADDR(x) ((int64_t)((uint64_t)(x) << 32) >> 16)
#define SHADE(x) (((intptr_t)(x) >> 3) + 0x7fff8000)
#define FRAME(x) ((int)((intptr_t)(x) >> 16))
static wontreturn void OnUnrecoverableMmapError(const char *s) {
if (weaken(__die)) weaken(__die)();
STRACE("%s %m", s);
__restorewintty();
_Exit(199);
}
static noasan inline bool OverlapsExistingMapping(char *p, size_t n) {
int a, b, i;
assert(n > 0);
a = FRAME(p);
b = FRAME(p + (n - 1));
i = FindMemoryInterval(&_mmi, a);
if (i < _mmi.i) {
if (a <= _mmi.p[i].x && _mmi.p[i].x <= b) return true;
if (a <= _mmi.p[i].y && _mmi.p[i].y <= b) return true;
if (_mmi.p[i].x <= a && b <= _mmi.p[i].y) return true;
}
return false;
}
static noasan bool ChooseMemoryInterval(int x, int n, int align, int *res) {
int i, start, end;
assert(align > 0);
if (_mmi.i) {
// find the start of the automap memory region
i = FindMemoryInterval(&_mmi, x);
if (i < _mmi.i) {
// check to see if there's space available before the first entry
if (!__builtin_add_overflow(x, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
if (end < _mmi.p[i].x) {
*res = start;
return true;
}
}
}
// check to see if there's space available between two entries
while (++i < _mmi.i) {
if (!__builtin_add_overflow(_mmi.p[i - 1].y, 1, &start) &&
!__builtin_add_overflow(start, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
if (end < _mmi.p[i].x) {
*res = start;
return true;
}
}
}
}
}
// otherwise append after the last entry if space is available
if (!__builtin_add_overflow(_mmi.p[i - 1].y, 1, &start) &&
!__builtin_add_overflow(start, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
*res = start;
return true;
}
}
} else {
// if memtrack is empty, then just assign the requested address
// assuming it doesn't overflow
if (!__builtin_add_overflow(x, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
*res = start;
return true;
}
}
}
return false;
}
noasan static bool Automap(int count, int align, int *res) {
return ChooseMemoryInterval(FRAME(kAutomapStart), count, align, res) &&
*res + count <= FRAME(kAutomapStart + (kAutomapSize - 1));
}
2020-06-15 14:18:57 +00:00
noasan static size_t GetMemtrackSize(struct MemoryIntervals *mm) {
size_t i, n;
for (n = i = 0; i < mm->i; ++i) {
n += ((size_t)(mm->p[i].y - mm->p[i].x) + 1) << 16;
}
return n;
}
static noasan void *FinishMemory(void *addr, size_t size, int prot, int flags,
int fd, int64_t off, int f, int x, int n,
struct DirectMap dm) {
2021-10-15 02:36:49 +00:00
if (!IsWindows() && (flags & MAP_FIXED)) {
if (UntrackMemoryIntervals(addr, size)) {
OnUnrecoverableMmapError("FIXED UNTRACK FAILED");
2021-10-15 02:36:49 +00:00
}
}
if (TrackMemoryInterval(&_mmi, x, x + (n - 1), dm.maphandle, prot, flags,
false, false, off, size)) {
2021-10-15 02:36:49 +00:00
if (sys_munmap(addr, n) == -1) {
OnUnrecoverableMmapError("TRACK MUNMAP FAILED");
2021-10-15 02:36:49 +00:00
}
return MAP_FAILED;
}
if (weaken(__asan_map_shadow) && !OverlapsShadowSpace(addr, size)) {
weaken(__asan_map_shadow)((intptr_t)addr, size);
}
return addr;
}
static noasan void *MapMemory(void *addr, size_t size, int prot, int flags,
int fd, int64_t off, int f, int x, int n) {
struct DirectMap dm;
dm = sys_mmap(addr, size, prot, f, fd, off);
if (UNLIKELY(dm.addr == MAP_FAILED)) {
if (IsWindows() && (flags & MAP_FIXED)) {
OnUnrecoverableMmapError(
"can't recover from MAP_FIXED errors on Windows");
}
return MAP_FAILED;
}
if (UNLIKELY(dm.addr != addr)) {
OnUnrecoverableMmapError("KERNEL DIDN'T RESPECT MAP_FIXED");
}
return FinishMemory(addr, size, prot, flags, fd, off, f, x, n, dm);
}
2021-10-15 02:36:49 +00:00
/**
* Maps memory from system, one frame at a time.
*
* This is useful on Windows since it allows us to partially unmap or
* punch holes into existing mappings.
*/
static textwindows dontinline noasan void *MapMemories(char *addr, size_t size,
int prot, int flags,
int fd, int64_t off,
int f, int x, int n) {
size_t i, m;
int64_t oi, sz;
2021-10-15 02:36:49 +00:00
struct DirectMap dm;
bool iscow, readonlyfile;
m = (size_t)(n - 1) << 16;
assert(m < size);
assert(m + FRAMESIZE >= size);
oi = fd == -1 ? 0 : off + m;
sz = size - m;
dm = sys_mmap(addr + m, sz, prot, f, fd, oi);
if (dm.addr == MAP_FAILED) return MAP_FAILED;
iscow = (flags & MAP_TYPE) != MAP_SHARED && fd != -1;
readonlyfile = (flags & MAP_TYPE) == MAP_SHARED && fd != -1 &&
(g_fds.p[fd].flags & O_ACCMODE) == O_RDONLY;
2021-10-15 02:36:49 +00:00
if (TrackMemoryInterval(&_mmi, x + (n - 1), x + (n - 1), dm.maphandle, prot,
flags, readonlyfile, iscow, oi, sz) == -1) {
OnUnrecoverableMmapError("MapMemories unrecoverable #1");
2021-10-15 02:36:49 +00:00
}
for (i = 0; i < m; i += FRAMESIZE) {
oi = fd == -1 ? 0 : off + i;
sz = FRAMESIZE;
dm = sys_mmap(addr + i, sz, prot, f, fd, oi);
2021-10-15 02:36:49 +00:00
if (dm.addr == MAP_FAILED ||
TrackMemoryInterval(&_mmi, x + i / FRAMESIZE, x + i / FRAMESIZE,
dm.maphandle, prot, flags, readonlyfile, iscow, oi,
sz) == -1) {
OnUnrecoverableMmapError("MapMemories unrecoverable #2");
2021-10-15 02:36:49 +00:00
}
}
if (weaken(__asan_map_shadow) && !OverlapsShadowSpace(addr, size)) {
weaken(__asan_map_shadow)((intptr_t)addr, size);
}
return addr;
}
static noasan inline void *Mmap(void *addr, size_t size, int prot, int flags,
int fd, int64_t off) {
#if defined(SYSDEBUG) && (_KERNTRACE || _NTTRACE)
if (IsWindows()) {
STRACE("mmap(%p, %'zu, %s, %s, %d, %'ld) → ...", addr, size,
DescribeProtFlags(prot), DescribeMapFlags(flags), fd, off);
}
#endif
char *p = addr;
2020-06-15 14:18:57 +00:00
struct DirectMap dm;
int a, b, i, f, m, n, x;
bool needguard, clashes;
size_t virtualused, virtualneed;
if (UNLIKELY(!size)) {
STRACE("size=0");
return VIP(einval());
}
if (UNLIKELY(!IsLegalPointer(p))) {
STRACE("p isn't 48-bit");
return VIP(einval());
}
if (UNLIKELY(!ALIGNED(p))) {
STRACE("p isn't 64kb aligned");
return VIP(einval());
}
if (UNLIKELY(fd < -1)) {
STRACE("mmap(%.12p, %'zu, fd=%d) EBADF", p, size, fd);
return VIP(ebadf());
}
if (UNLIKELY(!((fd != -1) ^ !!(flags & MAP_ANONYMOUS)))) {
STRACE("fd anonymous mismatch");
return VIP(einval());
}
if (UNLIKELY(!(!!(flags & MAP_PRIVATE) ^ !!(flags & MAP_SHARED)))) {
STRACE("MAP_SHARED ^ MAP_PRIVATE");
return VIP(einval());
}
if (UNLIKELY(off < 0)) {
STRACE("neg off");
return VIP(einval());
}
if (UNLIKELY(!ALIGNED(off))) {
STRACE("p isn't 64kb aligned");
return VIP(einval());
}
if (fd == -1) {
size = ROUNDUP(size, FRAMESIZE);
if (IsWindows()) {
prot |= PROT_WRITE; /* kludge */
}
} else if (__isfdkind(fd, kFdZip)) {
STRACE("fd is zipos handle");
return VIP(einval());
}
if (UNLIKELY(!IsLegalSize(size))) {
STRACE("size isn't 48-bit");
return VIP(einval());
}
if (UNLIKELY(INT64_MAX - size < off)) {
STRACE("too large");
return VIP(einval());
}
if (__virtualmax < LONG_MAX &&
(__builtin_add_overflow((virtualused = GetMemtrackSize(&_mmi)), size,
&virtualneed) ||
virtualneed > __virtualmax)) {
STRACE("%'zu size + %'zu inuse exceeds virtual memory limit %'zu", size,
virtualused, __virtualmax);
return VIP(enomem());
}
clashes = OverlapsImageSpace(p, size) || OverlapsExistingMapping(p, size);
if ((flags & MAP_FIXED_NOREPLACE) == MAP_FIXED_NOREPLACE && clashes) {
STRACE("noreplace overlaps existing");
return VIP(eexist());
}
if (__builtin_add_overflow((int)(size >> 16), (int)!!(size & (FRAMESIZE - 1)),
&n)) {
STRACE("memory range overflows");
return VIP(einval());
}
// if size is a two power then automap will use it as alignment
if (IS2POW(size)) {
a = size >> 16;
if (!a) {
a = 1;
2020-09-03 12:44:37 +00:00
}
} else {
a = 1;
}
f = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
if (flags & MAP_FIXED) {
x = FRAME(p);
if (IsWindows()) {
if (UntrackMemoryIntervals(p, size)) {
OnUnrecoverableMmapError("FIXED UNTRACK FAILED");
}
}
} else if (p && !clashes && !OverlapsArenaSpace(p, size) &&
!OverlapsShadowSpace(p, size)) {
x = FRAME(p);
} else if (!Automap(n, a, &x)) {
STRACE("automap has no room for %d frames with %d alignment", n, a);
return VIP(enomem());
}
needguard = false;
p = (char *)ADDR(x);
if ((f & MAP_TYPE) == MAP_STACK) {
if (~f & MAP_ANONYMOUS) {
STRACE("MAP_STACK must be anonymous");
return VIP(einval());
}
f &= ~MAP_TYPE;
f |= MAP_PRIVATE;
if (IsOpenbsd()) { // openbsd:dubstack
// on openbsd this is less about scalability of threads, and more
// about defining the legal intervals for the RSP register. sadly
// openbsd doesn't let us create a new fixed stack mapping. but..
// openbsd does allow us to overwrite existing fixed mappings, to
// authorize its usage as a stack.
if (sys_mmap(p, size, prot, f, fd, off).addr == MAP_FAILED) {
return MAP_FAILED;
}
f |= MAP_STACK_openbsd;
needguard = true;
} else if (IsLinux()) {
// by default MAP_GROWSDOWN will auto-allocate 10mb of pages. it's
// supposed to stop growing if an adjacent allocation exists, to
// prevent your stacks from overlapping on each other. we're not
// able to easily assume a mapping beneath this one exists. even
// if we could, the linux kernel requires for muh security reasons
// that stacks be at least 1mb away from each other, so it's not
// possible to avoid this call if our goal is to have 60kb stacks
// with 4kb guards like a sane multithreaded production system.
// however this 1mb behavior oddly enough is smart enough to not
// apply if the mapping is a manually-created guard page.
if ((dm = sys_mmap(p + size - PAGESIZE, PAGESIZE, prot,
f | MAP_GROWSDOWN_linux, fd, off))
.addr == MAP_FAILED) {
return MAP_FAILED;
}
sys_mmap(p, PAGESIZE, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
dm.addr = p;
return FinishMemory(p, size, prot, flags, fd, off, f, x, n, dm);
} else {
if (IsFreebsd()) {
f |= MAP_STACK_freebsd;
}
needguard = true;
2020-09-03 12:44:37 +00:00
}
2020-06-15 14:18:57 +00:00
}
if (!IsWindows()) {
p = MapMemory(p, size, prot, flags, fd, off, f, x, n);
} else {
p = MapMemories(p, size, prot, flags, fd, off, f, x, n);
}
if (p != MAP_FAILED) {
if (needguard) {
if (IsWindows()) _spunlock(&_mmi.lock);
mprotect(p, PAGESIZE, PROT_NONE);
if (IsAsan()) {
__repstosb((void *)(((intptr_t)p >> 3) + 0x7fff8000),
kAsanStackOverflow, PAGESIZE / 8);
}
if (IsWindows()) _spinlock(&_mmi.lock);
}
}
return p;
}
/**
* Beseeches system for page-table entries, e.g.
*
* char *m;
* m = mmap(NULL, FRAMESIZE, PROT_READ | PROT_WRITE,
* MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
* munmap(m, FRAMESIZE);
*
* @param addr should be 0 to let your memory manager choose address;
* unless MAP_FIXED or MAP_FIXED_NOREPLACE are specified in flags
* in which case this function will do precicely as you ask, even
* if p=0 (in which you need -fno-delete-null-pointer-checks); it
* needs to be 64kb aligned because it's a wise choice that sadly
* needs to be made mandatory because of Windows although you can
* use __sys_mmap() to circumvent it on System Five in which case
* runtime support services, e.g. asan memory safety, could break
* @param size must be >0 and needn't be a multiple of FRAMESIZE, but
* will be rounded up to FRAMESIZE automatically if MAP_ANONYMOUS
* is specified
* @param prot can have PROT_READ/PROT_WRITE/PROT_EXEC/PROT_NONE/etc.
* @param flags should have one of the following masked by `MAP_TYPE`
* - `MAP_FILE` in which case `fd != -1` should be the case
* - `MAP_PRIVATE` for copy-on-write behavior of writeable pages
* - `MAP_SHARED` to create shared memory between processes
* - `MAP_STACK` to create a grows-down alloc, where a guard page
* is automatically protected at the bottom: FreeBSD's behavior
* is polyfilled across platforms; uses MAP_GROWSDOWN on Linux
* too for extra oomph (do not use MAP_GROWSDOWN!) and this is
* completely mandatory on OpenBSD but helps perf elsewhere if
* you need to create 10,000 threads. This flag is the reason
* why `STACK_FRAME_UNLIMITED` toil is important, because this
* only allocates a 4096-byte guard page, thus we need the GCC
* compile-time checks to ensure some char[8192] vars will not
* create an undetectable overflow into another thread's stack
* Your `flags` may optionally bitwise or any of the following:
* - `MAP_ANONYMOUS` in which case `fd == -1` should be the case
* - `MAP_FIXED` in which case `addr` becomes more than a hint
* - `MAP_FIXED_NOREPLACE` to protect existing mappings; this is
* always polyfilled by mmap() which tracks its own memory and
* removed before passing to the kernel, in order to support
* old versions; if you believe mappings exist which only the
* kernel knows, then this flag may be passed to sys_mmap() on
* Linux 4.17+ and FreeBSD (where it has multiple bits)
* - `MAP_CONCEAL` is FreeBSD/NetBSD/OpenBSD-only
* - `MAP_NORESERVE` is Linux/XNU/NetBSD-only
* - `MAP_POPULATE` is Linux/FreeBSD-only
* - `MAP_NONBLOCK` is Linux-only
* - `MAP_NOSYNC` is FreeBSD-only
* - `MAP_INHERIT` is NetBSD-only
* - `MAP_LOCKED` is Linux-only
* @param fd is an open()'d file descriptor, whose contents shall be
* made available w/ automatic reading at the chosen address and
* must be -1 if MAP_ANONYMOUS is specified
* @param off specifies absolute byte index of fd's file for mapping,
* should be zero if MAP_ANONYMOUS is specified, and sadly needs
* to be 64kb aligned too
* @return virtual base address of new mapping, or MAP_FAILED w/ errno
*/
noasan void *mmap(void *addr, size_t size, int prot, int flags, int fd,
int64_t off) {
void *res;
_spinlock(&_mmi.lock);
res = Mmap(addr, size, prot, flags, fd, off);
_spunlock(&_mmi.lock);
STRACE("mmap(%p, %'zu, %s, %s, %d, %'ld) → %p% m", addr, size,
DescribeProtFlags(prot), DescribeMapFlags(flags), fd, off, res);
return res;
2020-06-15 14:18:57 +00:00
}