Make fork() go 30% faster

This change makes fork() go nearly as fast as sys_fork() on UNIX. As for
Windows this change shaves about 4-5ms off fork() + wait() latency. This
is accomplished by using WriteProcessMemory() from the parent process to
setup the address space of a suspended process; it is better than a pipe
This commit is contained in:
Justine Tunney 2025-01-01 04:59:38 -08:00
parent 98c5847727
commit 0b3c81dd4e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
44 changed files with 769 additions and 649 deletions

View file

@ -19,6 +19,7 @@
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
@ -32,6 +33,7 @@
#include "libc/intrin/weaken.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/runtime.h"
@ -44,9 +46,10 @@
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/lock.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
#define MMDEBUG 0
#define MMDEBUG 1
#define MAX_SIZE 0x0ff800000000ul
#define MAP_FIXED_NOREPLACE_linux 0x100000
@ -99,6 +102,31 @@ static bool __maps_overlaps(const char *addr, size_t size) {
return false;
}
// returns true if all fragments of all allocations which overlap
// [addr,addr+size) are completely contained by [addr,addr+size).
textwindows static bool __maps_envelops(const char *addr, size_t size) {
struct Map *map, *next;
size = PGUP(size);
if (!(map = __maps_floor(addr)))
if (!(map = __maps_first()))
return true;
do {
if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size))
break; // didn't overlap mapping
if (!__maps_isalloc(map))
return false; // didn't include first fragment of alloc
if (addr > map->addr)
return false; // excluded leading pages of first fragment
// set map to last fragment in allocation
for (; (next = __maps_next(map)) && !__maps_isalloc(next); map = next)
// fragments within an allocation must be perfectly contiguous
ASSERT(map->addr + map->size == next->addr);
if (addr + size < map->addr + PGUP(map->size))
return false; // excluded trailing pages of allocation
} while ((map = next));
return true;
}
void __maps_check(void) {
#if MMDEBUG
size_t maps = 0;
@ -130,17 +158,17 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted,
size_t ti = 0;
struct Map *map;
struct Map *next;
struct Map *floor;
size = PGUP(size);
floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = next) {
if (!(map = __maps_floor(addr)))
map = __maps_first();
for (; map && map->addr <= addr + size; map = next) {
next = __maps_next(map);
char *map_addr = map->addr;
size_t map_size = map->size;
if (!(MAX(addr, map_addr) < MIN(addr + size, map_addr + PGUP(map_size))))
continue;
if (addr <= map_addr && addr + size >= map_addr + PGUP(map_size)) {
if (map->precious)
if (map->hand == MAPS_RESERVATION)
continue;
// remove mapping completely
tree_remove(&__maps.maps, &map->tree);
@ -149,9 +177,6 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted,
__maps.pages -= (map_size + __pagesize - 1) / __pagesize;
__maps.count -= 1;
__maps_check();
} else if (IsWindows()) {
STRACE("you can't carve up memory maps on windows ;_;");
rc = enotsup();
} else if (addr <= map_addr) {
// shave off lefthand side of mapping
ASSERT(addr + size < map_addr + PGUP(map_size));
@ -229,6 +254,7 @@ void __maps_free(struct Map *map) {
ASSERT(!TAG(map));
map->size = 0;
map->addr = MAP_FAILED;
map->hand = kNtInvalidHandleValue;
for (tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);;) {
map->freed = (struct Map *)PTR(tip);
if (atomic_compare_exchange_weak_explicit(
@ -261,11 +287,23 @@ static int __maps_destroy_all(struct Map *list) {
if (!IsWindows()) {
if (sys_munmap(map->addr, map->size))
rc = -1;
} else if (map->hand != -1) {
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
} else {
switch (map->hand) {
case MAPS_SUBREGION:
case MAPS_RESERVATION:
break;
case MAPS_VIRTUAL:
if (!VirtualFree(map->addr, 0, kNtMemRelease))
rc = __winerr();
break;
default:
ASSERT(map->hand > 0);
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
break;
}
}
}
return rc;
@ -345,10 +383,9 @@ void __maps_insert(struct Map *map) {
if (!map && left && right)
if (__maps_mergeable(left, right)) {
left->size = PGUP(left->size);
right->addr -= left->size;
right->size += left->size;
tree_remove(&__maps.maps, &left->tree);
__maps_free(left);
left->size += right->size;
tree_remove(&__maps.maps, &right->tree);
__maps_free(right);
__maps.count -= 1;
}
@ -369,7 +406,7 @@ bool __maps_track(char *addr, size_t size, int prot, int flags) {
map->size = size;
map->prot = prot;
map->flags = flags;
map->hand = -1;
map->hand = MAPS_VIRTUAL;
__maps_lock();
__maps_insert(map);
__maps_unlock();
@ -396,22 +433,23 @@ struct Map *__maps_alloc(void) {
return map;
pthread_pause_np();
}
int size = 65536;
// we're creating sudden surprise memory. the user might be in the
// middle of carefully planning a fixed memory structure. we don't
// want the system allocator to put our surprise memory inside it,
// and we also want to avoid the chances of accidentally unmapping
struct DirectMap sys =
sys_mmap(__maps_randaddr(), size, PROT_READ | PROT_WRITE,
sys_mmap(__maps_randaddr(), MAPS_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (sys.addr == MAP_FAILED)
return 0;
map = sys.addr;
if (IsWindows())
CloseHandle(sys.maphandle);
for (int i = 1; i < size / sizeof(struct Map); ++i)
__maps_free(map + i);
return map;
struct MapSlab *slab = sys.addr;
while (!atomic_compare_exchange_weak(&__maps.slabs, &slab->next, slab)) {
}
for (size_t i = 1; i < ARRAYLEN(slab->maps); ++i)
__maps_free(&slab->maps[i]);
return &slab->maps[0];
}
static int __munmap(char *addr, size_t size) {
@ -429,13 +467,10 @@ static int __munmap(char *addr, size_t size) {
__maps_lock();
__maps_check();
// normalize size
// abort if size doesn't include all pages in granule
if (GRUP(size) > PGUP(size))
if (__maps_overlaps(addr + PGUP(size), GRUP(size) - PGUP(size))) {
__maps_unlock();
return einval();
}
// on windows we can only unmap whole allocations
if (IsWindows())
if (!__maps_envelops(addr, size))
return enotsup();
// untrack mappings
int rc;
@ -572,6 +607,11 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
}
} else {
// remove existing mappings and their tracking objects
if (!__maps_envelops(addr, size)) {
__maps_unlock();
__maps_free(map);
return (void *)enotsup();
}
struct Map *deleted = 0;
if (__muntrack(addr, size, &deleted, 0, 0)) {
__maps_insert_all(deleted);
@ -592,8 +632,7 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
map->size = size;
map->prot = 0;
map->flags = 0;
map->hand = -1;
map->precious = true;
map->hand = MAPS_RESERVATION;
__maps_insert(map);
__maps_unlock();
}
@ -610,7 +649,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
__maps_lock();
tree_remove(&__maps.maps, &map->tree);
__maps.pages -= (map->size + __pagesize - 1) / __pagesize;
map->precious = false;
__maps_unlock();
if (errno == EADDRNOTAVAIL) {
// we've encountered mystery memory
@ -649,7 +687,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
map->prot = prot;
map->flags = flags;
map->hand = res.maphandle;
map->precious = false;
if (IsWindows()) {
map->iscow = (flags & MAP_TYPE) != MAP_SHARED && fd != -1;
map->readonlyfile = (flags & MAP_TYPE) == MAP_SHARED && fd != -1 &&
@ -710,21 +747,6 @@ static void *__mmap(char *addr, size_t size, int prot, int flags, int fd,
static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
int flags, char *new_addr) {
// normalize and validate old size
// abort if size doesn't include all pages in granule
if (GRUP(old_size) > PGUP(old_size))
if (__maps_overlaps(old_addr + PGUP(old_size),
GRUP(old_size) - PGUP(old_size)))
return (void *)einval();
// validate new size
// abort if size doesn't include all pages in granule
if (flags & MREMAP_FIXED)
if (GRUP(new_size) > PGUP(new_size))
if (__maps_overlaps(new_addr + PGUP(new_size),
GRUP(new_size) - PGUP(new_size)))
return (void *)einval();
// allocate object for tracking new mapping
struct Map *map;
if (!(map = __maps_alloc()))
@ -787,6 +809,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
map->off = old_off;
map->prot = old_prot;
map->flags = old_flags;
map->hand = kNtInvalidHandleValue;
__maps_insert(map);
return res;
@ -945,8 +968,8 @@ static void *__mremap(char *old_addr, size_t old_size, size_t new_size,
*
* @raise ENOMEM if `RUSAGE_AS` or similar limits are exceeded
* @raise EEXIST if `flags` has `MAP_FIXED_NOREPLACE` and `addr` is used
* @raise ENOTSUP if interval overlapped without enveloping win32 alloc
* @raise EPERM if `addr` is null and `flags` has `MAP_FIXED`
* @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED`
* @raise EINVAL if `addr` isn't granularity aligned with `MAP_FIXED`
* @raise EINVAL if `size` is zero
* @raise EINVAL if `flags` or `prot` hold invalid values
@ -1000,10 +1023,9 @@ void *mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) {
*
* @return 0 on success, or -1 w/ errno.
* @raise ENOMEM if OOM happened when punching hole in existing mapping
* @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED`
* @raise ENOTSUP if interval overlapped without enveloping win32 alloc
* @raise EDEADLK if called from signal handler interrupting mmap()
* @raise EINVAL if `addr` isn't granularity aligned
* @raise EINVAL if `size` didn't include all pages in granule
*/
int munmap(void *addr, size_t size) {
int rc = __munmap(addr, size);