cosmopolitan/libc/intrin/mmap.c
Justine Tunney 3c61a541bd
Introduce pthread_condattr_setclock()
This is one of the few POSIX APIs that was missing. It lets you choose a
monotonic clock for your condition variables. This might improve perf on
some platforms. It might also grant more flexibility with NTP configs. I
know Qt is one project that believes it needs this. To introduce this, I
needed to change some the *NSYNC APIs, to support passing a clock param.
There's also new benchmarks, demonstrating Cosmopolitan's supremacy over
many libc implementations when it comes to mutex performance. Cygwin has
an alarmingly bad pthread_mutex_t implementation. It is so bad that they
would have been significantly better off if they'd used naive spinlocks.
2024-09-02 23:45:42 -07:00

889 lines
29 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/describebacktrace.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/directmap.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/maps.h"
#include "libc/intrin/strace.h"
#include "libc/intrin/tree.h"
#include "libc/intrin/weaken.h"
#include "libc/nt/memory.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/mremap.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#define MMDEBUG IsModeDbg()
#define MAX_SIZE 0x0ff800000000ul
#define MAX_TRIES 50
#define MAP_FIXED_NOREPLACE_linux 0x100000
#define PGUP(x) (((x) + pagesz - 1) & -pagesz)
#define MASQUE 0x00fffffffffffff8
#define PTR(x) ((uintptr_t)(x) & MASQUE)
#define TAG(x) ROL((uintptr_t)(x) & ~MASQUE, 8)
#define ABA(p, t) ((uintptr_t)(p) | (ROR((uintptr_t)(t), 8) & ~MASQUE))
#define ROL(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define ROR(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#if !MMDEBUG
#define ASSERT(x) (void)0
#else
#define ASSERT(x) \
do { \
if (!(x)) { \
char bt[160]; \
struct StackFrame *bp = __builtin_frame_address(0); \
kprintf("%!s:%d: assertion failed: %!s\n", __FILE__, __LINE__, #x); \
kprintf("bt %!s\n", _DescribeBacktrace(bt, bp)); \
__print_maps(0); \
__builtin_trap(); \
} \
} while (0)
#endif
int __maps_compare(const struct Tree *ra, const struct Tree *rb) {
const struct Map *a = (const struct Map *)MAP_TREE_CONTAINER(ra);
const struct Map *b = (const struct Map *)MAP_TREE_CONTAINER(rb);
return (a->addr > b->addr) - (a->addr < b->addr);
}
privileged optimizespeed struct Map *__maps_floor(const char *addr) {
struct Tree *node;
if ((node = tree_floor(__maps.maps, addr, __maps_search)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static bool __maps_overlaps(const char *addr, size_t size, int pagesz) {
struct Map *map, *floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map))
if (MAX(addr, map->addr) <
MIN(addr + PGUP(size), map->addr + PGUP(map->size)))
return true;
return false;
}
void __maps_check(void) {
#if MMDEBUG
size_t maps = 0;
size_t pages = 0;
int pagesz = __pagesize;
static unsigned mono;
unsigned id = ++mono;
for (struct Map *map = __maps_first(); map; map = __maps_next(map)) {
ASSERT(map->addr != MAP_FAILED);
ASSERT(map->visited != id);
ASSERT(map->size);
map->visited = id;
pages += (map->size + pagesz - 1) / pagesz;
maps += 1;
struct Map *next;
if ((next = __maps_next(map))) {
ASSERT(map->addr < next->addr);
ASSERT(MAX(map->addr, next->addr) >=
MIN(map->addr + PGUP(map->size), next->addr + PGUP(next->size)));
}
}
ASSERT(maps = __maps.count);
ASSERT(pages == __maps.pages);
#endif
}
static int __muntrack(char *addr, size_t size, int pagesz,
struct Map **deleted) {
int rc = 0;
struct Map *map;
struct Map *next;
struct Map *floor;
StartOver:
floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = next) {
next = __maps_next(map);
char *map_addr = map->addr;
size_t map_size = map->size;
if (!(MAX(addr, map_addr) <
MIN(addr + PGUP(size), map_addr + PGUP(map_size))))
continue;
if (addr <= map_addr && addr + PGUP(size) >= map_addr + PGUP(map_size)) {
// remove mapping completely
tree_remove(&__maps.maps, &map->tree);
map->freed = *deleted;
*deleted = map;
__maps.pages -= (map_size + pagesz - 1) / pagesz;
__maps.count -= 1;
__maps_check();
} else if (IsWindows()) {
STRACE("you can't carve up memory maps on windows ;_;");
rc = einval();
} else if (addr <= map_addr) {
// shave off lefthand side of mapping
ASSERT(addr + PGUP(size) < map_addr + PGUP(map_size));
size_t left = addr + PGUP(size) - map_addr;
size_t right = map_size - left;
ASSERT(right > 0);
ASSERT(left > 0);
struct Map *leftmap;
if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
map->addr += left;
map->size = right;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left;
__maps.pages -= (left + pagesz - 1) / pagesz;
leftmap->addr = map_addr;
leftmap->size = left;
leftmap->freed = *deleted;
*deleted = leftmap;
__maps_check();
} else {
rc = -1;
}
} else if (addr + PGUP(size) >= map_addr + PGUP(map_size)) {
// shave off righthand side of mapping
size_t left = addr - map_addr;
size_t right = map_addr + map_size - addr;
struct Map *rightmap;
if ((rightmap = __maps_alloc())) {
if (rightmap == MAPS_RETRY)
goto StartOver;
map->size = left;
__maps.pages -= (right + pagesz - 1) / pagesz;
rightmap->addr = addr;
rightmap->size = right;
rightmap->freed = *deleted;
*deleted = rightmap;
__maps_check();
} else {
rc = -1;
}
} else {
// punch hole in mapping
size_t left = addr - map_addr;
size_t middle = PGUP(size);
size_t right = map_size - middle - left;
struct Map *leftmap;
if ((leftmap = __maps_alloc())) {
if (leftmap == MAPS_RETRY)
goto StartOver;
struct Map *middlemap;
if ((middlemap = __maps_alloc())) {
if (middlemap == MAPS_RETRY) {
__maps_free(leftmap);
goto StartOver;
}
leftmap->addr = map_addr;
leftmap->size = left;
leftmap->off = map->off;
leftmap->prot = map->prot;
leftmap->flags = map->flags;
map->addr += left + middle;
map->size = right;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left + middle;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);
__maps.pages -= (middle + pagesz - 1) / pagesz;
__maps.count += 1;
middlemap->addr = addr;
middlemap->size = size;
middlemap->freed = *deleted;
*deleted = middlemap;
__maps_check();
} else {
__maps_free(leftmap);
rc = -1;
}
} else {
rc = -1;
}
}
}
return rc;
}
void __maps_free(struct Map *map) {
uintptr_t tip;
ASSERT(!TAG(map));
map->size = 0;
map->addr = MAP_FAILED;
for (tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);;) {
map->freed = (struct Map *)PTR(tip);
if (atomic_compare_exchange_weak_explicit(
&__maps.freed, &tip, ABA(map, TAG(tip) + 1), memory_order_release,
memory_order_relaxed))
break;
pthread_pause_np();
}
}
static void __maps_free_all(struct Map *list) {
struct Map *next;
for (struct Map *map = list; map; map = next) {
next = map->freed;
__maps_free(map);
}
}
void __maps_insert(struct Map *map) {
map->flags &= MAP_TYPE | MAP_ANONYMOUS | MAP_NOFORK;
// coalesce adjacent mappings
if (!IsWindows() && (map->flags & MAP_ANONYMOUS)) {
int prot = map->prot & ~(MAP_FIXED | MAP_FIXED_NOREPLACE);
int flags = map->flags;
bool coalesced = false;
struct Map *floor, *other, *last = 0;
for (other = floor = __maps_floor(map->addr);
other && other->addr <= map->addr + map->size;
last = other, other = __maps_next(other)) {
if (prot == other->prot && flags == other->flags) {
if (!coalesced) {
if (map->addr == other->addr + other->size) {
__maps.pages += (map->size + __pagesize - 1) / __pagesize;
other->size += map->size;
__maps_free(map);
__maps_check();
coalesced = true;
} else if (map->addr + map->size == other->addr) {
__maps.pages += (map->size + __pagesize - 1) / __pagesize;
other->addr -= map->size;
other->size += map->size;
__maps_free(map);
__maps_check();
coalesced = true;
}
}
if (last && other->addr == last->addr + last->size) {
other->addr -= last->size;
other->size += last->size;
tree_remove(&__maps.maps, &last->tree);
__maps.count -= 1;
__maps_free(last);
__maps_check();
}
}
}
if (coalesced)
return;
}
// otherwise insert new mapping
__maps.pages += (map->size + __pagesize - 1) / __pagesize;
__maps_add(map);
__maps_check();
}
static void __maps_track_insert(struct Map *map, char *addr, size_t size,
uintptr_t map_handle) {
map->addr = addr;
map->size = size;
map->prot = PROT_READ | PROT_WRITE;
map->flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NOFORK;
map->hand = map_handle;
__maps_lock();
__maps_insert(map);
__maps_unlock();
}
bool __maps_track(char *addr, size_t size) {
struct Map *map;
do {
if (!(map = __maps_alloc()))
return false;
} while (map == MAPS_RETRY);
__maps_track_insert(map, addr, size, -1);
return true;
}
struct Map *__maps_alloc(void) {
struct Map *map;
uintptr_t tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);
while ((map = (struct Map *)PTR(tip))) {
if (atomic_compare_exchange_weak_explicit(
&__maps.freed, &tip, ABA(map->freed, TAG(tip) + 1),
memory_order_acquire, memory_order_relaxed))
return map;
pthread_pause_np();
}
int gransz = __gransize;
struct DirectMap sys = sys_mmap(0, gransz, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (sys.addr == MAP_FAILED)
return 0;
map = sys.addr;
__maps_track_insert(map, sys.addr, gransz, sys.maphandle);
for (int i = 1; i < gransz / sizeof(struct Map); ++i)
__maps_free(map + i);
return MAPS_RETRY;
}
static int __munmap(char *addr, size_t size) {
// validate arguments
int pagesz = __pagesize;
int gransz = __gransize;
if (((uintptr_t)addr & (gransz - 1)) || //
!size || (uintptr_t)addr + size < size)
return einval();
// lock the memory manager
// abort on reentry due to signal handler
if (__maps_lock()) {
__maps_unlock();
return edeadlk();
}
__maps_check();
// normalize size
// abort if size doesn't include all pages in granule
size_t pgup_size = (size + pagesz - 1) & -pagesz;
size_t grup_size = (size + gransz - 1) & -gransz;
if (grup_size > pgup_size)
if (__maps_overlaps(addr + pgup_size, grup_size - pgup_size, pagesz)) {
__maps_unlock();
return einval();
}
// untrack mappings
int rc;
struct Map *deleted = 0;
rc = __muntrack(addr, pgup_size, pagesz, &deleted);
__maps_unlock();
// delete mappings
for (struct Map *map = deleted; map; map = map->freed) {
if (!IsWindows()) {
if (sys_munmap(map->addr, map->size))
rc = -1;
} else if (map->hand != -1) {
ASSERT(!((uintptr_t)map->addr & (gransz - 1)));
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
}
}
// freed mappings
__maps_free_all(deleted);
return rc;
}
void *__maps_randaddr(void) {
uintptr_t addr;
addr = _rand64();
addr &= 0x3fffffffffff;
addr |= 0x004000000000;
addr &= -__gransize;
return (void *)addr;
}
void *__maps_pickaddr(size_t size) {
char *addr;
for (int try = 0; try < MAX_TRIES; ++try) {
addr = atomic_exchange_explicit(&__maps.pick, 0, memory_order_acq_rel);
if (!addr)
addr = __maps_randaddr();
__maps_lock();
bool overlaps = __maps_overlaps(addr, size, __pagesize);
__maps_unlock();
if (!overlaps) {
atomic_store_explicit(&__maps.pick,
addr + ((size + __gransize - 1) & __gransize),
memory_order_release);
return addr;
}
}
return 0;
}
static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
int64_t off, int pagesz, int gransz) {
// allocate Map object
struct Map *map;
do {
if (!(map = __maps_alloc()))
return MAP_FAILED;
} while (map == MAPS_RETRY);
// polyfill nuances of fixed mappings
int sysflags = flags;
bool noreplace = false;
bool should_untrack = false;
if (flags & MAP_FIXED_NOREPLACE) {
if (flags & MAP_FIXED) {
__maps_free(map);
return (void *)einval();
}
sysflags &= ~MAP_FIXED_NOREPLACE;
if (IsLinux()) {
noreplace = true;
sysflags |= MAP_FIXED_NOREPLACE_linux;
} else if (IsFreebsd() || IsNetbsd()) {
sysflags |= MAP_FIXED;
if (__maps_overlaps(addr, size, pagesz)) {
__maps_free(map);
return (void *)eexist();
}
} else {
noreplace = true;
}
} else if (flags & MAP_FIXED) {
should_untrack = true;
}
// remove mapping we blew away
if (IsWindows() && should_untrack)
__munmap(addr, size);
// obtain mapping from operating system
int olderr = errno;
int tries = MAX_TRIES;
struct DirectMap res;
TryAgain:
res = sys_mmap(addr, size, prot, sysflags, fd, off);
if (res.addr == MAP_FAILED) {
if (IsWindows() && errno == EADDRNOTAVAIL) {
if (noreplace) {
errno = EEXIST;
} else if (should_untrack) {
errno = ENOMEM;
} else if (--tries && (addr = __maps_pickaddr(size))) {
errno = olderr;
goto TryAgain;
} else {
errno = ENOMEM;
}
}
__maps_free(map);
return MAP_FAILED;
}
// polyfill map fixed noreplace
// we assume non-linux gives us addr if it's freed
// that's what linux (e.g. rhel7) did before noreplace
if (noreplace && res.addr != addr) {
if (!IsWindows()) {
sys_munmap(res.addr, size);
} else {
UnmapViewOfFile(res.addr);
CloseHandle(res.maphandle);
}
__maps_free(map);
return (void *)eexist();
}
// untrack mapping we blew away
if (!IsWindows() && should_untrack) {
struct Map *deleted = 0;
__muntrack(res.addr, size, pagesz, &deleted);
__maps_free_all(deleted);
}
// track map object
map->addr = res.addr;
map->size = size;
map->off = off;
map->prot = prot;
map->flags = flags;
map->hand = res.maphandle;
if (IsWindows()) {
map->iscow = (flags & MAP_TYPE) != MAP_SHARED && fd != -1;
map->readonlyfile = (flags & MAP_TYPE) == MAP_SHARED && fd != -1 &&
(g_fds.p[fd].flags & O_ACCMODE) == O_RDONLY;
}
__maps_lock();
__maps_insert(map);
__maps_unlock();
return res.addr;
}
static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
int64_t off, int pagesz, int gransz) {
// validate file map args
if (!(flags & MAP_ANONYMOUS)) {
if (off & (gransz - 1))
return (void *)einval();
if (IsWindows()) {
if (!__isfdkind(fd, kFdFile))
return (void *)eacces();
if ((g_fds.p[fd].flags & O_ACCMODE) == O_WRONLY)
return (void *)eacces();
}
}
// try to pick our own addresses on windows which are higher up in the
// vaspace. this is important so that conflicts are less likely, after
// forking when resurrecting mappings, because win32 has a strong pref
// with lower memory addresses which may get assigned to who knows wut
if (IsWindows() && !addr)
if (!(addr = __maps_pickaddr(size)))
return (void *)enomem();
return __mmap_chunk(addr, size, prot, flags, fd, off, pagesz, gransz);
}
static void *__mmap(char *addr, size_t size, int prot, int flags, int fd,
int64_t off) {
char *res;
int pagesz = __pagesize;
int gransz = __gransize;
// validate arguments
if ((uintptr_t)addr & (gransz - 1))
addr = NULL;
if (!addr && (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)))
return (void *)eperm();
if ((intptr_t)addr < 0)
return (void *)enomem();
if (!size || (uintptr_t)addr + size < size)
return (void *)einval();
if (size > MAX_SIZE)
return (void *)enomem();
if (__maps.count * pagesz + size > __virtualmax)
return (void *)enomem();
// create memory mappping
if (!__isfdkind(fd, kFdZip)) {
res = __mmap_impl(addr, size, prot, flags, fd, off, pagesz, gransz);
} else {
res = _weaken(__zipos_mmap)(
addr, size, prot, flags,
(struct ZiposHandle *)(uintptr_t)g_fds.p[fd].handle, off);
}
return res;
}
static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
int flags, char *new_addr, int pagesz, int gransz) {
// normalize and validate old size
// abort if size doesn't include all pages in granule
size_t pgup_old_size = (old_size + pagesz - 1) & -pagesz;
size_t grup_old_size = (old_size + gransz - 1) & -gransz;
if (grup_old_size > pgup_old_size)
if (__maps_overlaps(old_addr + pgup_old_size, grup_old_size - pgup_old_size,
pagesz))
return (void *)einval();
old_size = pgup_old_size;
// validate new size
// abort if size doesn't include all pages in granule
if (flags & MREMAP_FIXED) {
size_t pgup_new_size = (new_size + pagesz - 1) & -pagesz;
size_t grup_new_size = (new_size + gransz - 1) & -gransz;
if (grup_new_size > pgup_new_size)
if (__maps_overlaps(new_addr + pgup_new_size,
grup_new_size - pgup_new_size, pagesz))
return (void *)einval();
}
// allocate object for tracking new mapping
struct Map *map;
do {
if (!(map = __maps_alloc()))
return (void *)enomem();
} while (map == MAPS_RETRY);
// check old interval is fully contained within one mapping
struct Map *old_map;
if (!(old_map = __maps_floor(old_addr)) ||
old_addr + old_size > old_map->addr + PGUP(old_map->size) ||
old_addr < old_map->addr) {
__maps_free(map);
return (void *)efault();
}
// save old properties
int old_off = old_map->off;
int old_prot = old_map->prot;
int old_flags = old_map->flags;
// netbsd mremap fixed returns enoent rather than unmapping old pages
if (IsNetbsd() && (flags & MREMAP_FIXED))
if (__munmap(new_addr, new_size)) {
__maps_free(map);
return MAP_FAILED;
}
// release lock before system call if possible
if (!flags)
__maps_unlock();
// the time has come
char *res;
if (IsNetbsd()) {
int sysfl = (flags & MREMAP_FIXED) ? MAP_FIXED : 0;
res = sys_mremap(old_addr, old_size, (uintptr_t)new_addr, new_size, sysfl);
} else {
res = sys_mremap(old_addr, old_size, new_size, flags, (uintptr_t)new_addr);
}
if (res == MAP_FAILED)
__maps_free(map);
// re-acquire lock if needed
if (!flags)
__maps_lock();
if (res == MAP_FAILED)
return MAP_FAILED;
if (!(flags & MREMAP_MAYMOVE))
ASSERT(res == old_addr);
// untrack old mapping
struct Map *deleted = 0;
__muntrack(old_addr, old_size, pagesz, &deleted);
__maps_free_all(deleted);
// track map object
map->addr = res;
map->size = new_size;
map->off = old_off;
map->prot = old_prot;
map->flags = old_flags;
__maps_insert(map);
return res;
}
static void *__mremap(char *old_addr, size_t old_size, size_t new_size,
int flags, char *new_addr) {
int pagesz = __pagesize;
int gransz = __gransize;
// kernel support
if (!IsLinux() && !IsNetbsd())
return (void *)enosys();
// it is not needed
if (new_size <= old_size)
if (!(flags & MREMAP_FIXED))
if (flags & MREMAP_MAYMOVE)
flags &= ~MREMAP_MAYMOVE;
// we support these flags
if (flags & ~(MREMAP_MAYMOVE | MREMAP_FIXED))
return (void *)einval();
if (IsNetbsd() && !(flags & MREMAP_MAYMOVE) &&
((new_size + pagesz - 1) & -pagesz) > old_size)
return (void *)enotsup();
if ((flags & MREMAP_FIXED) && !(flags & MREMAP_MAYMOVE))
return (void *)einval();
// addresses must be granularity aligned
if ((uintptr_t)old_addr & (gransz - 1))
return (void *)einval();
if (flags & MREMAP_FIXED)
if ((uintptr_t)new_addr & (gransz - 1))
return (void *)einval();
// sizes must not be zero
if (!old_size)
return (void *)einval();
if (!new_size)
return (void *)einval();
// check for big size
if (old_size > MAX_SIZE)
return (void *)enomem();
if (new_size > MAX_SIZE)
return (void *)enomem();
// check for overflow
if ((uintptr_t)old_addr + old_size < old_size)
return (void *)enomem();
if (flags & MREMAP_FIXED)
if ((uintptr_t)new_addr + new_size < new_size)
return (void *)enomem();
// old and new intervals must not overlap
if (flags & MREMAP_FIXED)
if (MAX(old_addr, new_addr) <
MIN(old_addr + old_size, new_addr + PGUP(new_size)))
return (void *)einval();
// memory increase must not exceed RLIMIT_AS
if (PGUP(new_size) > old_size)
if (__maps.count * pagesz - old_size + PGUP(new_size) > __virtualmax)
return (void *)enomem();
// lock the memory manager
// abort on reentry due to signal handler
if (__maps_lock()) {
__maps_unlock();
return (void *)edeadlk();
}
__maps_check();
// perform operation
char *res = __mremap_impl(old_addr, old_size, new_size, flags, new_addr,
pagesz, gransz);
// return result
__maps_unlock();
return res;
}
/**
* Creates memory mapping.
*
* The mmap() function is used by Cosmopolitan's malloc() implementation
* to obtain new memory from the operating system. This function is also
* useful for establishing a mapping between a file on disk and a memory
* address, which avoids most need to call read() and write(). It is how
* executables are loaded into memory, for instance, in which case pages
* are loaded lazily from disk by the operating system.
*
* The `addr` parameter may be zero. This lets the implementation choose
* an available address in memory. OSes normally pick something randomly
* assigned, for security. Most OSes try to make sure subsequent mapping
* requests will be adjacent to one another. More paranoid OSes may have
* different mappings be sparse, with unmapped content between them. You
* may not use the `MAP_FIXED` parameter to create a memory map at NULL.
*
* The `addr` parameter may be non-zero, in which case Cosmopolitan will
* give you a mapping at that specific address if it's available. When a
* mapping already exists at the requested address then another one will
* be chosen automatically. On most OSes the newly selected address will
* be as close-by as possible, but that's not guaranteed. If `MAP_FIXED`
* is also supplied in `flags` then this hint is taken as mandatory, and
* existing mappings at the requested interval shall be auto-unmapped.
*
* The `size` parameter is implicitly rounded up to the system page size
* reported by getpagesize() and sysconf(_SC_PAGESIZE). Your extra bytes
* will be zero-initialized.
*
* The returned address will always be aligned, on the system allocation
* granularity. This value may be obtained from getgransize() or calling
* sysconf(_SC_GRANSIZE). Granularity is always greater than or equal to
* the page size. On some platforms, i.e. Windows, it may be larger than
* the page size.
*
* The `prot` value specifies the memory protection of the mapping. This
* may be `PROT_NONE` to disallow all access otherwise it's a bitwise or
* of the following constants:
*
* - `PROT_READ` allows read access
* - `PROT_WRITE` allows write access
* - `PROT_EXEC` allows execute access
*
* Some OSes (i.e. OpenBSD) will raise an error if both `PROT_WRITE` and
* `PROT_EXEC` are requested. You may still modify executable memory but
* you must use mprotect() to transition between the two states. On some
* OSes like MacOS ARM64, you need to pass the `MAP_JIT` flag to get RWX
* memory, which is considered zero on other OSes.
*
* The lower bits of the `flags` parameter specify the `MAP_TYPE`, which
* may be:
*
* - `MAP_PRIVATE` for non-shared and copy-on-write mappings
* - `MAP_SHARED` for memory that may be shared between processes
*
* Your `fd` argument specifies the file descriptor of the open file you
* want to map. This parameter is ignored when `MAP_ANONYMOUS` is passed
* via `flags`.
*
* Your `off` argument specifies the offset into a, file at which mapped
* memory shall begin. It must be aligned to the allocation granularity,
* which may be obtained from getgransize() or sysconf(_SC_GRANSIZE).
*
* The `MAP_FIXED_NOREPLACE` flag may be passed in `flags` which has the
* same behavior as `MAP_FIXED` except it raises `EEXIST` when a mapping
* already exists on the requested interval.
*
* The `MAP_CONCEAL` flag may be passed to prevent a memory mapping from
* appearing in core dumps. This is currently supported on BSD OSes, and
* is ignored on everything else.
*/
void *mmap(void *addr, size_t size, int prot, int flags, int fd, int64_t off) {
void *res = __mmap(addr, size, prot, flags, fd, off);
STRACE("mmap(%p, %'zu, %s, %s, %d, %'ld) → %p% m (%'zu bytes total)", addr,
size, DescribeProtFlags(prot), DescribeMapFlags(flags), fd, off, res,
__maps.pages * __pagesize);
return res;
}
/**
* Changes memory mapping.
*
* This system call lets you move memory without copying it. It can also
* be used to shrink memory mappings.
*
* This system call is supported on Linux and NetBSD. It's used by Cosmo
* Libc's realloc() implementation under the hood.
*
* The `flags` parameter may have:
*
* - `MREMAP_MAYMOVE` to allow relocation
* - `MREMAP_FIXED` in which case an additional parameter is taken
*
*/
void *mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) {
va_list ap;
void *new_addr = 0;
if (flags & MREMAP_FIXED) {
va_start(ap, flags);
new_addr = va_arg(ap, void *);
va_end(ap);
}
void *res = __mremap(old_addr, old_size, new_size, flags, new_addr);
STRACE("mremap(%p, %'zu, %'zu, %s, %p) → %p% m (%'zu bytes total)", old_addr,
old_size, new_size, DescribeMremapFlags(flags), new_addr, res,
__maps.pages * __pagesize);
return res;
}
/**
* Removes memory mapping.
*
* The `size` parameter is implicitly rounded up to the page size.
*
* @return 0 on success, or -1 w/ errno.
*/
int munmap(void *addr, size_t size) {
int rc = __munmap(addr, size);
STRACE("munmap(%p, %'zu) → %d% m (%'zu bytes total)", addr, size, rc,
__maps.pages * __pagesize);
return rc;
}
__weak_reference(mmap, mmap64);